From: Jim Procter Date: Thu, 23 Jun 2016 13:58:42 +0000 (+0100) Subject: Merge branch 'develop' into merge_JAL-2110 X-Git-Tag: Release_2_10_0~140^2~5^2~36 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d2299844ae932a515a5007f30caf766a2c83ad97;hp=9f06569004895c680687699673abfe462b6a2086;p=jalview.git Merge branch 'develop' into merge_JAL-2110 --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 33a54e8..74066d7 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -24,6 +24,7 @@ import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -1404,31 +1405,103 @@ public class AlignmentUtils * * @param dna * aligned dna sequences - * @param mappings - * from dna to protein - * @param al + * @param dataset + * - throws error if not given a dataset * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) */ public static AlignmentI makeCdsAlignment(SequenceI[] dna, - List mappings, AlignmentI al) + AlignmentI dataset) { + if (dataset.getDataset() != null) + { + throw new Error( + "IMPLEMENTATION ERROR: dataset.getDataset() must be null!"); + } List cdsSeqs = new ArrayList(); + List mappings = dataset.getCodonFrames(); + + /* + * construct CDS sequences from the (cds-to-protein) mappings made earlier; + * this makes it possible to model multiple products from dna (e.g. EMBL); + * however it does mean we don't have the EMBL protein_id (a property on + * the CDS features) in order to make the CDS sequence name :-( + */ for (SequenceI seq : dna) { - AlignedCodonFrame cdsMappings = new AlignedCodonFrame(); + SequenceI seqDss = seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence(); List seqMappings = MappingUtils .findMappingsForSequence(seq, mappings); - List alignmentMappings = al.getCodonFrames(); for (AlignedCodonFrame mapping : seqMappings) { - for (Mapping aMapping : mapping.getMappingsFromSequence(seq)) + List mappingsFromSequence = mapping.getMappingsFromSequence(seq); + + for (Mapping aMapping : mappingsFromSequence) { - SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(), - aMapping); + if (aMapping.getMap().getFromRatio() == 1) + { + /* + * not a dna-to-protein mapping (likely dna-to-cds) + */ + continue; + } + + /* + * check for an existing CDS sequence i.e. a 3:1 mapping to + * the dna mapping's product + */ + SequenceI cdsSeq = null; + // TODO better mappings collection data model so we can do + // a table lookup instead of double loops to find mappings + SequenceI proteinProduct = aMapping.getTo(); + for (AlignedCodonFrame acf : MappingUtils + .findMappingsForSequence(proteinProduct, mappings)) + { + for (SequenceToSequenceMapping map : acf.getMappings()) + { + if (map.getMapping().getMap().getFromRatio() == 3 + && proteinProduct == map.getMapping().getTo() + && seqDss != map.getFromSeq()) + { + /* + * found a 3:1 mapping to the protein product which is not + * from the dna sequence...assume it is from the CDS sequence + * TODO mappings data model that brings together related + * dna-cds-protein mappings in one object + */ + cdsSeq = map.getFromSeq(); + } + } + } + if (cdsSeq != null) + { + /* + * mappings are always to dataset sequences so create an aligned + * sequence to own it; add the dataset sequence to the dataset + */ + SequenceI derivedSequence = cdsSeq.deriveSequence(); + cdsSeqs.add(derivedSequence); + if (!dataset.getSequences().contains(cdsSeq)) + { + dataset.addSequence(cdsSeq); + } + continue; + } + + /* + * didn't find mapped CDS sequence - construct it and add + * its dataset sequence to the dataset + */ + cdsSeq = makeCdsSequence(seq.getDatasetSequence(), aMapping); + SequenceI cdsSeqDss = cdsSeq.createDatasetSequence(); cdsSeqs.add(cdsSeq); - + if (!dataset.getSequences().contains(cdsSeqDss)) + { + dataset.addSequence(cdsSeqDss); + } + /* * add a mapping from CDS to the (unchanged) mapped to range */ @@ -1437,16 +1510,29 @@ public class AlignmentUtils MapList map = new MapList(cdsRange, aMapping.getMap() .getToRanges(), aMapping.getMap().getFromRatio(), aMapping.getMap().getToRatio()); - cdsMappings.addMap(cdsSeq, aMapping.getTo(), map); + AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); + cdsToProteinMapping.addMap(cdsSeq, proteinProduct, map); + + /* + * guard against duplicating the mapping if repeating this action + */ + if (!mappings.contains(cdsToProteinMapping)) + { + mappings.add(cdsToProteinMapping); + } /* * add another mapping from original 'from' range to CDS */ + AlignedCodonFrame dnaToProteinMapping = new AlignedCodonFrame(); map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1, 1); - cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map); + dnaToProteinMapping.addMap(seq.getDatasetSequence(), cdsSeq, map); + if (!mappings.contains(dnaToProteinMapping)) + { + mappings.add(dnaToProteinMapping); + } - alignmentMappings.add(cdsMappings); /* * transfer any features on dna that overlap the CDS @@ -1456,20 +1542,9 @@ public class AlignmentUtils } } - /* - * add CDS seqs to shared dataset - */ - Alignment dataset = al.getDataset(); - for (SequenceI seq : cdsSeqs) - { - if (!dataset.getSequences().contains(seq.getDatasetSequence())) - { - dataset.addSequence(seq.getDatasetSequence()); - } - } AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs .size()])); - cds.setDataset(dataset); + cds.setDataset((Alignment) dataset); return cds; } @@ -1481,7 +1556,7 @@ public class AlignmentUtils * * @param seq * @param mapping - * @return + * @return CDS sequence (as a dataset sequence) */ static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping) { @@ -1513,7 +1588,6 @@ public class AlignmentUtils SequenceI newSeq = new Sequence(seq.getName() + "|" + mapping.getTo().getName(), newSeqChars, 1, newPos); - newSeq.createDatasetSequence(); return newSeq; } @@ -1799,17 +1873,20 @@ public class AlignmentUtils * sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet or NCList? */ - Arrays.sort(peptide.getSequenceFeatures(), - new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) + if (peptide.getSequenceFeatures() != null) + { + Arrays.sort(peptide.getSequenceFeatures(), + new Comparator() { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) - : c; - } - }); + @Override + public int compare(SequenceFeature o1, SequenceFeature o2) + { + int c = Integer.compare(o1.getBegin(), o2.getBegin()); + return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) + : c; + } + }); + } return count; } diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 7e77fc1..0011e39 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -24,23 +24,21 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MapList; -import jalview.ws.SequenceFetcher; +import jalview.ws.SequenceFetcherFactory; import jalview.ws.seqfetcher.ASequenceFetcher; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; -import java.util.Vector; /** - * Functions for cross-referencing sequence databases. user must first specify - * if cross-referencing from protein or dna (set dna==true) + * Functions for cross-referencing sequence databases. * * @author JimP * @@ -48,195 +46,178 @@ import java.util.Vector; public class CrossRef { /* - * A sub-class that ignores Parent attribute when comparing sequence - * features. This avoids 'duplicate' CDS features that only - * differ in their parent Transcript ids. + * the dataset of the alignment for which we are searching for + * cross-references; in some cases we may resolve xrefs by + * searching in the dataset */ - class MySequenceFeature extends SequenceFeature - { - private SequenceFeature feat; + private AlignmentI dataset; - MySequenceFeature(SequenceFeature sf) - { - this.feat = sf; - } + /* + * the sequences for which we are seeking cross-references + */ + private SequenceI[] fromSeqs; - @Override - public boolean equals(Object o) - { - return feat.equals(o, true); - } - } + /** + * matcher built from dataset + */ + SequenceIdMatcher matcher; /** - * Select just the DNA or protein references for a protein or dna sequence - * - * @param fromDna - * if true, select references from DNA (i.e. Protein databases), else - * DNA database references - * @param refs - * a set of references to select from - * @return + * sequences found by cross-ref searches to fromSeqs */ - public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs) - { - return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS - : DBRefSource.DNACODINGDBS); - // could attempt to find other cross - // refs here - ie PDB xrefs - // (not dna, not protein seq) - } + List rseqs; /** - * @param dna - * true if seqs are DNA seqs + * mappings constructed + */ + AlignedCodonFrame cf; + + /** + * Constructor + * * @param seqs - * @return a list of sequence database cross reference source types + * the sequences for which we are seeking cross-references + * @param ds + * the containing alignment dataset (may be searched to resolve + * cross-references) */ - public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs) + public CrossRef(SequenceI[] seqs, AlignmentI ds) { - return findSequenceXrefTypes(dna, seqs, null); + fromSeqs = seqs; + dataset = ds.getDataset() == null ? ds : ds.getDataset(); } /** - * Indirect references are references from other sequences from the dataset to - * any of the direct DBRefEntrys on the given sequences. + * Returns a list of distinct database sources for which sequences have either + *
    + *
  • a (dna-to-protein or protein-to-dna) cross-reference
  • + *
  • an indirect cross-reference - a (dna-to-protein or protein-to-dna) + * reference from another sequence in the dataset which has a cross-reference + * to a direct DBRefEntry on the given sequence
  • + *
* * @param dna - * true if seqs are DNA seqs - * @param seqs - * @return a list of sequence database cross reference source types + * - when true, cross-references *from* dna returned. When false, + * cross-references *from* protein are returned + * @return */ - public static String[] findSequenceXrefTypes(boolean dna, - SequenceI[] seqs, AlignmentI dataset) + public List findXrefSourcesForSequences(boolean dna) { - String[] dbrefs = null; - List refs = new ArrayList(); - for (SequenceI seq : seqs) + List sources = new ArrayList(); + for (SequenceI seq : fromSeqs) { if (seq != null) { - SequenceI dss = seq; - while (dss.getDatasetSequence() != null) - { - dss = dss.getDatasetSequence(); - } - DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs()); - if (rfs != null) - { - for (DBRefEntry ref : rfs) - { - if (!refs.contains(ref.getSource())) - { - refs.add(ref.getSource()); - } - } - } - if (dataset != null) - { - // search for references to this sequence's direct references. - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); - List rseqs = new ArrayList(); - CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs, - null); // don't need to specify codon frame for mapping here - for (SequenceI rs : rseqs) - { - DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); - if (xrs != null) - { - for (DBRefEntry ref : xrs) - { - if (!refs.contains(ref.getSource())) - { - refs.add(ref.getSource()); - } - } - } - // looks like copy and paste - change rfs to xrs? - // for (int r = 0; rfs != null && r < rfs.length; r++) - // { - // if (!refs.contains(rfs[r].getSource())) - // { - // refs.add(rfs[r].getSource()); - // } - // } - } - } + findXrefSourcesForSequence(seq, dna, sources); } } - if (refs.size() > 0) - { - dbrefs = new String[refs.size()]; - refs.toArray(dbrefs); - } - return dbrefs; + return sources; } - public static boolean hasCdnaMap(SequenceI[] seqs) + /** + * Returns a list of distinct database sources for which a sequence has either + *
    + *
  • a (dna-to-protein or protein-to-dna) cross-reference
  • + *
  • an indirect cross-reference - a (dna-to-protein or protein-to-dna) + * reference from another sequence in the dataset which has a cross-reference + * to a direct DBRefEntry on the given sequence
  • + *
+ * + * @param seq + * the sequence whose dbrefs we are searching against + * @param fromDna + * when true, context is DNA - so sources identifying protein + * products will be returned. + * @param sources + * a list of sources to add matches to + */ + void findXrefSourcesForSequence(SequenceI seq, boolean fromDna, + List sources) { - // TODO unused - remove? - String[] reftypes = findSequenceXrefTypes(false, seqs); - for (int s = 0; s < reftypes.length; s++) + /* + * first find seq's xrefs (dna-to-peptide or peptide-to-dna) + */ + DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); + addXrefsToSources(rfs, sources); + if (dataset != null) { - if (reftypes.equals(DBRefSource.EMBLCDS)) + /* + * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs + */ + DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); + List rseqs = new ArrayList(); + + /* + * find sequences in the alignment which xref one of these DBRefs + * i.e. is xref-ed to a common sequence identifier + */ + searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null); + + /* + * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources + */ + for (SequenceI rs : rseqs) { - return true; - // no map + DBRefEntry[] xrs = DBRefUtils + .selectDbRefs(!fromDna, rs.getDBRefs()); + addXrefsToSources(xrs, sources); } } - return false; } - public static SequenceI[] getCdnaMap(SequenceI[] seqs) + /** + * Helper method that adds the source identifiers of some cross-references to + * a (non-redundant) list of database sources + * + * @param xrefs + * @param sources + */ + void addXrefsToSources(DBRefEntry[] xrefs, List sources) { - // TODO unused - remove? - Vector cseqs = new Vector(); - for (int s = 0; s < seqs.length; s++) + if (xrefs != null) { - DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs()); - for (int c = 0; c < cdna.length; c++) + for (DBRefEntry ref : xrefs) { - if (cdna[c].getSource().equals(DBRefSource.EMBLCDS)) + /* + * avoid duplication e.g. ENSEMBL and Ensembl + */ + String source = DBRefUtils.getCanonicalName(ref.getSource()); + if (!sources.contains(source)) { - System.err - .println("TODO: unimplemented sequence retrieval for coding region sequence."); - // TODO: retrieve CDS dataset sequences - // need global dataset sequence retriever/resolver to reuse refs - // and construct Mapping entry. - // insert gaps in CDS according to peptide gaps. - // add gapped sequence to cseqs + sources.add(source); } } } - if (cseqs.size() > 0) - { - SequenceI[] rsqs = new SequenceI[cseqs.size()]; - cseqs.copyInto(rsqs); - return rsqs; - } - return null; - } /** + * Attempts to find cross-references from the sequences provided in the + * constructor to the given source database. Cross-references may be found + *
    + *
  • in dbrefs on the sequence which hold a mapping to a sequence + *
      + *
    • provided with a fetched sequence (e.g. ENA translation), or
    • + *
    • populated previously after getting cross-references
    • + *
    + *
  • as other sequences in the alignment which share a dbref identifier with + * the sequence
  • + *
  • by fetching from the remote database
  • + *
+ * The cross-referenced sequences, and mappings to them, are added to the + * alignment dataset. * - * @param seqs - * sequences whose xrefs are being retrieved - * @param dna - * true if sequences are nucleotide * @param source - * @param al - * alignment to search for cross-referenced sequences (and possibly - * add to) - * @return products (as dataset sequences) + * @return cross-referenced sequences (as dataset sequences) */ - public static Alignment findXrefSequences(SequenceI[] seqs, - final boolean dna, final String source, AlignmentI al) + public Alignment findXrefSequences(String source, boolean fromDna) { - AlignmentI dataset = al.getDataset() == null ? al : al.getDataset(); - List rseqs = new ArrayList(); - AlignedCodonFrame cf = new AlignedCodonFrame(); - for (SequenceI seq : seqs) + + rseqs = new ArrayList(); + cf = new AlignedCodonFrame(); + matcher = new SequenceIdMatcher( + dataset.getSequences()); + + for (SequenceI seq : fromSeqs) { SequenceI dss = seq; while (dss.getDatasetSequence() != null) @@ -244,35 +225,74 @@ public class CrossRef dss = dss.getDatasetSequence(); } boolean found = false; - DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs()); + DBRefEntry[] xrfs = DBRefUtils + .selectDbRefs(!fromDna, dss.getDBRefs()); if ((xrfs == null || xrfs.length == 0) && dataset != null) { - System.out.println("Attempting to find ds Xrefs refs."); - // FIXME should be dss not seq here? - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); - // less ambiguous would be a 'find primary dbRefEntry' method. - // filter for desired source xref here - found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, - rseqs, cf); + /* + * found no suitable dbrefs on sequence - look for sequences in the + * alignment which share a dbref with this one + */ + DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, + seq.getDBRefs()); + + /* + * find sequences (except this one!), of complementary type, + * which have a dbref to an accession id for this sequence, + * and add them to the results + */ + found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); } - for (int r = 0; xrfs != null && r < xrfs.length; r++) + if (xrfs == null && !found) { - DBRefEntry xref = xrfs[r]; - if (source != null && !source.equals(xref.getSource())) - { - continue; - } + /* + * no dbref to source on this sequence or matched + * complementary sequence in the dataset + */ + continue; + } + List sourceRefs = DBRefUtils.searchRefsForSource(xrfs, + source); + Iterator refIterator = sourceRefs.iterator(); + while (refIterator.hasNext()) + { + DBRefEntry xref = refIterator.next(); + found = false; if (xref.hasMap()) { - if (xref.getMap().getTo() != null) + SequenceI mappedTo = xref.getMap().getTo(); + if (mappedTo != null) { - SequenceI rsq = new Sequence(xref.getMap().getTo()); + /* + * dbref contains the sequence it maps to; add it to the + * results unless we have done so already (could happen if + * fetching xrefs for sequences which have xrefs in common) + * for example: UNIPROT {P0CE19, P0CE20} -> EMBL {J03321, X06707} + */ + found = true; + /* + * problem: matcher.findIdMatch() is lenient - returns a sequence + * with a dbref to the search arg e.g. ENST for ENSP - wrong + * but findInDataset() matches ENSP when looking for Uniprot... + */ + SequenceI matchInDataset = findInDataset(xref); + /*matcher.findIdMatch(mappedTo);*/ + if (matchInDataset != null) + { + if (!rseqs.contains(matchInDataset)) + { + rseqs.add(matchInDataset); + } + refIterator.remove(); + continue; + } + SequenceI rsq = new Sequence(mappedTo); rseqs.add(rsq); - if (xref.getMap().getMap().getFromRatio() != xref - .getMap().getMap().getToRatio()) + if (xref.getMap().getMap().getFromRatio() != xref.getMap() + .getMap().getToRatio()) { // get sense of map correct for adding to product alignment. - if (dna) + if (fromDna) { // map is from dna seq to a protein product cf.addMap(dss, rsq, xref.getMap().getMap()); @@ -283,203 +303,308 @@ public class CrossRef cf.addMap(rsq, dss, xref.getMap().getMap().getInverse()); } } - found = true; } } + if (!found) { - // do a bit more work - search for sequences with references matching - // xrefs on this sequence. - if (dataset != null) + SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|" + + xref.getAccessionId()); + if (matchedSeq != null) { - found |= searchDataset(dss, xref, dataset, rseqs, cf, false, - !dna); - if (found) + if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) { - xrfs[r] = null; // we've recovered seqs for this one. + found = true; } } } + + if (!found) + { + // do a bit more work - search for sequences with references matching + // xrefs on this sequence. + found = searchDataset(fromDna, dss, xref, rseqs, cf, false); + } + if (found) + { + refIterator.remove(); + } + } + + /* + * fetch from source database any dbrefs we haven't resolved up to here + */ + if (!sourceRefs.isEmpty()) + { + retrieveCrossRef(sourceRefs, seq, xrfs, fromDna); } - if (!found) + } + + Alignment ral = null; + if (rseqs.size() > 0) + { + ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); + if (!cf.isEmpty()) { - if (xrfs != null && xrfs.length > 0) + dataset.addCodonFrame(cf); + } + } + return ral; + } + + private void retrieveCrossRef(List sourceRefs, SequenceI seq, + DBRefEntry[] xrfs, boolean fromDna) + { + ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); + SequenceI[] retrieved = null; + SequenceI dss = null; + try + { + retrieved = sftch.getSequences(sourceRefs, !fromDna); + } catch (Exception e) + { + System.err + .println("Problem whilst retrieving cross references for Sequence : " + + seq.getName()); + e.printStackTrace(); + } + + if (retrieved != null) + { + updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); + for (SequenceI retrievedSequence : retrieved) + { + // dataset gets contaminated ccwith non-ds sequences. why ??! + // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL-> + SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence + : retrievedSequence.getDatasetSequence(); + DBRefEntry[] dbr = retrievedSequence.getDBRefs(); + if (dbr != null) { - // Try and get the sequence reference... - /* - * Ideal world - we ask for a sequence fetcher implementation here if - * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) ( - */ - ASequenceFetcher sftch = new SequenceFetcher(); - SequenceI[] retrieved = null; - int l = xrfs.length; - for (int r = 0; r < xrfs.length; r++) + for (DBRefEntry dbref : dbr) { - // filter out any irrelevant or irretrievable references - if (xrfs[r] == null - || ((source != null && !source.equals(xrfs[r] - .getSource())) || !sftch.isFetchable(xrfs[r] - .getSource()))) + // find any entry where we should put in the sequence being + // cross-referenced into the map + Mapping map = dbref.getMap(); + if (map != null) { - l--; - xrfs[r] = null; - } - } - if (l > 0) - { - // System.out - // .println("Attempting to retrieve cross referenced sequences."); - DBRefEntry[] t = new DBRefEntry[l]; - l = 0; - for (int r = 0; r < xrfs.length; r++) - { - if (xrfs[r] != null) - { - t[l++] = xrfs[r]; - } - } - xrfs = t; - try - { - retrieved = sftch.getSequences(xrfs, !dna); - // problem here is we don't know which of xrfs resulted in which - // retrieved element - } catch (Exception e) - { - System.err - .println("Problem whilst retrieving cross references for Sequence : " - + seq.getName()); - e.printStackTrace(); - } - - if (retrieved != null) - { - updateDbrefMappings(dna, seq, xrfs, retrieved, cf); - - SequenceIdMatcher matcher = new SequenceIdMatcher( - dataset.getSequences()); - List copiedFeatures = new ArrayList(); - CrossRef me = new CrossRef(); - for (int rs = 0; rs < retrieved.length; rs++) + if (map.getTo() != null && map.getMap() != null) { - // TODO: examine each sequence for 'redundancy' - DBRefEntry[] dbr = retrieved[rs].getDBRefs(); - if (dbr != null && dbr.length > 0) + // TODO findInDataset requires exact sequence match but + // 'congruent' test is only for the mapped part + // maybe not a problem in practice since only ENA provide a + // mapping and it is to the full protein translation of CDS + SequenceI matched = findInDataset(dbref); + // matcher.findIdMatch(map.getTo()); + if (matched != null) { - for (int di = 0; di < dbr.length; di++) + /* + * already got an xref to this sequence; update this + * map to point to the same sequence, and add + * any new dbrefs to it + */ + DBRefEntry[] toRefs = map.getTo().getDBRefs(); + if (toRefs != null) { - // find any entry where we should put in the sequence being - // cross-referenced into the map - Mapping map = dbr[di].getMap(); - if (map != null) + for (DBRefEntry ref : toRefs) { - if (map.getTo() != null && map.getMap() != null) + matched.addDBRef(ref); // add or update mapping + } + } + map.setTo(matched); + } + else + { + matcher.add(map.getTo()); + } + try + { + // compare ms with dss and replace with dss in mapping + // if map is congruent + SequenceI ms = map.getTo(); + int sf = map.getMap().getToLowest(); + int st = map.getMap().getToHighest(); + SequenceI mappedrg = ms.getSubSequence(sf, st); + // SequenceI loc = dss.getSubSequence(sf, st); + if (mappedrg.getLength() > 0 + && ms.getSequenceAsString().equals( + dss.getSequenceAsString())) + // && mappedrg.getSequenceAsString().equals( + // loc.getSequenceAsString())) + { + String msg = "Mapping updated from " + ms.getName() + + " to retrieved crossreference " + + dss.getName(); + System.out.println(msg); + map.setTo(dss); + + /* + * give the reverse reference the inverse mapping + * (if it doesn't have one already) + */ + setReverseMapping(dss, dbref, cf); + + /* + * copy sequence features as well, avoiding + * duplication (e.g. same variation from two + * transcripts) + */ + SequenceFeature[] sfs = ms.getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature feat : sfs) { - SequenceI matched = matcher - .findIdMatch(map.getTo()); - if (matched != null) - { - /* - * already got an xref to this sequence; update this - * map to point to the same sequence, and add - * any new dbrefs to it - */ - for (DBRefEntry ref : map.getTo().getDBRefs()) - { - matched.addDBRef(ref); // add or update mapping - } - map.setTo(matched); - } - else - { - matcher.add(map.getTo()); - } - try + /* + * make a flyweight feature object which ignores Parent + * attribute in equality test; this avoids creating many + * otherwise duplicate exon features on genomic sequence + */ + SequenceFeature newFeature = new SequenceFeature( + feat) { - // compare ms with dss and replace with dss in mapping - // if map is congruent - SequenceI ms = map.getTo(); - int sf = map.getMap().getToLowest(); - int st = map.getMap().getToHighest(); - SequenceI mappedrg = ms.getSubSequence(sf, st); - // SequenceI loc = dss.getSubSequence(sf, st); - if (mappedrg.getLength() > 0 - && ms.getSequenceAsString().equals( - dss.getSequenceAsString())) - // && mappedrg.getSequenceAsString().equals( - // loc.getSequenceAsString())) - { - String msg = "Mapping updated from " - + ms.getName() - + " to retrieved crossreference " - + dss.getName(); - System.out.println(msg); - // method to update all refs of existing To on - // retrieved sequence with dss and merge any props - // on To onto dss. - map.setTo(dss); - /* - * copy sequence features as well, avoiding - * duplication (e.g. same variation from 2 - * transcripts) - */ - SequenceFeature[] sfs = ms - .getSequenceFeatures(); - if (sfs != null) - { - for (SequenceFeature feat : sfs) - { - /* - * we override SequenceFeature.equals here (but - * not elsewhere) to ignore Parent attribute - * TODO not quite working yet! - */ - if (!copiedFeatures - .contains(me.new MySequenceFeature( - feat))) - { - dss.addSequenceFeature(feat); - copiedFeatures.add(feat); - } - } - } - cf.addMap(retrieved[rs].getDatasetSequence(), - dss, map.getMap()); - } - else + @Override + public boolean equals(Object o) { - cf.addMap(retrieved[rs].getDatasetSequence(), - map.getTo(), map.getMap()); + return super.equals(o, true); } - } catch (Exception e) - { - System.err - .println("Exception when consolidating Mapped sequence set..."); - e.printStackTrace(System.err); - } + }; + dss.addSequenceFeature(newFeature); } } } + cf.addMap(retrievedDss, map.getTo(), map.getMap()); + } catch (Exception e) + { + System.err + .println("Exception when consolidating Mapped sequence set..."); + e.printStackTrace(System.err); } - retrieved[rs].updatePDBIds(); - rseqs.add(retrieved[rs]); } } } } + retrievedSequence.updatePDBIds(); + rseqs.add(retrievedDss); + dataset.addSequence(retrievedDss); + matcher.add(retrievedDss); + } + } + } + /** + * Sets the inverse sequence mapping in the corresponding dbref of the mapped + * to sequence (if any). This is used after fetching a cross-referenced + * sequence, if the fetched sequence has a mapping to the original sequence, + * to set the mapping in the original sequence's dbref. + * + * @param mapFrom + * the sequence mapped from + * @param dbref + * @param mappings + */ + void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref, + AlignedCodonFrame mappings) + { + SequenceI mapTo = dbref.getMap().getTo(); + if (mapTo == null) + { + return; + } + DBRefEntry[] dbrefs = mapTo.getDBRefs(); + if (dbrefs == null) + { + return; + } + for (DBRefEntry toRef : dbrefs) + { + if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) + { + /* + * found the reverse dbref; update its mapping if null + */ + if (toRef.getMap().getMap() == null) + { + MapList inverse = dbref.getMap().getMap().getInverse(); + toRef.getMap().setMap(inverse); + mappings.addMap(mapTo, mapFrom, inverse); + } } } + } - Alignment ral = null; - if (rseqs.size() > 0) + /** + * Returns the first identical sequence in the dataset if any, else null + * + * @param xref + * @return + */ + SequenceI findInDataset(DBRefEntry xref) + { + if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) { - ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); - if (cf != null && !cf.isEmpty()) + return null; + } + SequenceI mapsTo = xref.getMap().getTo(); + String name = xref.getAccessionId(); + String name2 = xref.getSource() + "|" + name; + SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo + .getDatasetSequence(); + for (SequenceI seq : dataset.getSequences()) + { + /* + * clumsy alternative to using SequenceIdMatcher which currently + * returns sequences with a dbref to the matched accession id + * which we don't want + */ + if (name.equals(seq.getName()) || seq.getName().startsWith(name2)) { - ral.addCodonFrame(cf); + if (sameSequence(seq, dss)) + { + return seq; + } } } - return ral; + return null; + } + + /** + * Answers true if seq1 and seq2 contain exactly the same characters (ignoring + * case), else false. This method compares the lengths, then each character in + * turn, in order to 'fail fast'. For case-sensitive comparison, it would be + * possible to use Arrays.equals(seq1.getSequence(), seq2.getSequence()). + * + * @param seq1 + * @param seq2 + * @return + */ + // TODO move to Sequence / SequenceI + static boolean sameSequence(SequenceI seq1, SequenceI seq2) + { + if (seq1 == seq2) + { + return true; + } + if (seq1 == null || seq2 == null) + { + return false; + } + char[] c1 = seq1.getSequence(); + char[] c2 = seq2.getSequence(); + if (c1.length != c2.length) + { + return false; + } + for (int i = 0; i < c1.length; i++) + { + int diff = c1[i] - c2[i]; + /* + * same char or differ in case only ('a'-'A' == 32) + */ + if (diff != 0 && diff != 32 && diff != -32) + { + return false; + } + } + return true; } /** @@ -487,14 +612,13 @@ public class CrossRef * retrieved sequence if found, and adds any new mappings to the * AlignedCodonFrame * - * @param dna * @param mapFrom * @param xrefs * @param retrieved * @param acf */ - static void updateDbrefMappings(boolean dna, SequenceI mapFrom, - DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf) + void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs, + SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) { SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved); for (DBRefEntry xref : xrefs) @@ -510,55 +634,104 @@ public class CrossRef } for (SequenceI seq : matches) { - MapList mapping = null; - if (dna) - { - mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom); - } - else - { - mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq); - if (mapping != null) - { - mapping = mapping.getInverse(); - } - } - if (mapping != null) - { - xref.setMap(new Mapping(seq, mapping)); - if (dna) - { - AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping); - } - if (dna) - { - acf.addMap(mapFrom, seq, mapping); - } - else - { - acf.addMap(seq, mapFrom, mapping.getInverse()); - } - continue; - } + constructMapping(mapFrom, seq, xref, acf, fromDna); } } } } /** + * Tries to make a mapping between sequences. If successful, adds the mapping + * to the dbref and the mappings collection and answers true, otherwise + * answers false. The following methods of making are mapping are tried in + * turn: + *
    + *
  • if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for + * example, the case after fetching EMBL cross-references for a Uniprot + * sequence
  • + *
  • else check if the dna translates exactly to the protein (give or take + * start and stop codons>
  • + *
  • else try to map based on CDS features on the dna sequence
  • + *
+ * + * @param mapFrom + * @param mapTo + * @param xref + * @param mappings + * @return + */ + boolean constructMapping(SequenceI mapFrom, SequenceI mapTo, + DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) + { + MapList mapping = null; + + /* + * look for a reverse mapping, if found make its inverse + */ + if (mapTo.getDBRefs() != null) + { + for (DBRefEntry dbref : mapTo.getDBRefs()) + { + String name = dbref.getSource() + "|" + dbref.getAccessionId(); + if (dbref.hasMap() && mapFrom.getName().startsWith(name)) + { + /* + * looks like we've found a map from 'mapTo' to 'mapFrom' + * - invert it to make the mapping the other way + */ + MapList reverse = dbref.getMap().getMap().getInverse(); + xref.setMap(new Mapping(mapTo, reverse)); + mappings.addMap(mapFrom, mapTo, reverse); + return true; + } + } + } + + if (fromDna) + { + mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); + } + else + { + mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo); + if (mapping != null) + { + mapping = mapping.getInverse(); + } + } + if (mapping == null) + { + return false; + } + xref.setMap(new Mapping(mapTo, mapping)); + if (fromDna) + { + AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); + mappings.addMap(mapFrom, mapTo, mapping); + } + else + { + mappings.addMap(mapTo, mapFrom, mapping.getInverse()); + } + + return true; + } + + /** * find references to lrfs in the cross-reference set of each sequence in * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry * based on source and accession string only - Map and Version are nulled. * + * @param fromDna + * - true if context was searching from Dna sequences, false if + * context was searching from Protein sequences * @param sequenceI * @param lrfs - * @param dataset * @param rseqs * @return true if matches were found. */ - private static boolean searchDatasetXrefs(SequenceI sequenceI, - boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, - List rseqs, AlignedCodonFrame cf) + private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, + DBRefEntry[] lrfs, List rseqs, AlignedCodonFrame cf) { boolean found = false; if (lrfs == null) @@ -571,50 +744,44 @@ public class CrossRef // add in wildcards xref.setVersion(null); xref.setMap(null); - found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna); + found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false); } return found; } /** - * search a given sequence dataset for references matching cross-references to - * the given sequence + * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the + * associated sequence to rseqs * + * @param fromDna + * true if context was searching for refs *from* dna sequence, false + * if context was searching for refs *from* protein sequence * @param sequenceI + * a sequence to ignore (start point of search) * @param xrf - * @param dataset + * a cross-reference to try to match * @param rseqs - * set of unique sequences + * result list to add to * @param cf - * @return true if one or more unique sequences were found and added - */ - public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, - AlignmentI dataset, List rseqs, AlignedCodonFrame cf) - { - return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false); - } - - /** - * TODO: generalise to different protein classifications Search dataset for - * DBRefEntrys matching the given one (xrf) and add the associated sequence to - * rseq. - * - * @param sequenceI - * @param xrf - * @param dataset - * @param rseqs + * a set of sequence mappings to add to * @param direct - * - search all references or only subset - * @param dna - * search dna or protein xrefs (if direct=false) + * - indicates the type of relationship between returned sequences, + * xrf, and sequenceI that is required. + *
    + *
  • direct implies xrf is a primary reference for sequenceI AND + * the sequences to be located (eg a uniprot ID for a protein + * sequence, and a uniprot ref on a transcript sequence).
  • + *
  • indirect means xrf is a cross reference with respect to + * sequenceI or all the returned sequences (eg a genomic reference + * associated with a locus and one or more transcripts)
  • + *
* @return true if relationship found and sequence added. */ - public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, - AlignmentI dataset, List rseqs, AlignedCodonFrame cf, - boolean direct, boolean dna) + boolean searchDataset(boolean fromDna, SequenceI sequenceI, + DBRefEntry xrf, List rseqs, AlignedCodonFrame cf, + boolean direct) { boolean found = false; - SequenceI[] typer = new SequenceI[1]; if (dataset == null) { return false; @@ -634,107 +801,82 @@ public class CrossRef if (nxt.getDatasetSequence() != null) { System.err - .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!"); + .println("Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" + + nxt.getDisplayId(true) + + " has ds reference " + + nxt.getDatasetSequence().getDisplayId(true) + + ")"); + } + if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence()) + { + continue; } - if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence()) + /* + * only look at same molecule type if 'direct', or + * complementary type if !direct + */ { - // check if this is the correct sequence type + boolean isDna = !nxt.isProtein(); + if (direct ? (isDna != fromDna) : (isDna == fromDna)) { - typer[0] = nxt; - boolean isDna = jalview.util.Comparison.isNucleotide(typer); - if ((direct && isDna == dna) || (!direct && isDna != dna)) - { - // skip this sequence because it is same molecule type - continue; - } + // skip this sequence because it is wrong molecule type + continue; } + } - // look for direct or indirect references in common - DBRefEntry[] poss = nxt.getDBRefs(), cands = null; - if (direct) - { - cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); - } - else - { - poss = CrossRef.findXDbRefs(dna, poss); // - cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); - } - if (cands != null) + // look for direct or indirect references in common + DBRefEntry[] poss = nxt.getDBRefs(); + List cands = null; + + // todo: indirect specifies we select either direct references to nxt + // that match xrf which is indirect to sequenceI, or indirect + // references to nxt that match xrf which is direct to sequenceI + cands = DBRefUtils.searchRefs(poss, xrf); + // else + // { + // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss); + // cands = DBRefUtils.searchRefs(poss, xrf); + // } + if (!cands.isEmpty()) + { + if (!rseqs.contains(nxt)) { - if (!rseqs.contains(nxt)) + found = true; + rseqs.add(nxt); + if (cf != null) { - rseqs.add(nxt); - boolean foundmap = cf != null; // don't search if we aren't given a codon map object - for (int r = 0; foundmap && r < cands.length; r++) + for (DBRefEntry candidate : cands) { - if (cands[r].hasMap()) + Mapping mapping = candidate.getMap(); + if (mapping != null) { - if (cands[r].getMap().getTo() != null - && cands[r].getMap().getMap().getFromRatio() != cands[r] - .getMap().getMap().getToRatio()) + MapList map = mapping.getMap(); + if (mapping.getTo() != null + && map.getFromRatio() != map.getToRatio()) { - foundmap = true; // get sense of map correct for adding to product // alignment. - if (dna) + if (fromDna) { // map is from dna seq to a protein product - cf.addMap(sequenceI, nxt, cands[r].getMap() - .getMap()); + cf.addMap(sequenceI, nxt, map); } else { // map should be from protein seq to its coding dna - cf.addMap(nxt, sequenceI, cands[r].getMap() - .getMap().getInverse()); + cf.addMap(nxt, sequenceI, map.getInverse()); } } } } - // TODO: add mapping between sequences if necessary - found = true; } + // TODO: add mapping between sequences if necessary } - } } } } return found; } - - /** - * precalculate different products that can be found for seqs in dataset and - * return them. - * - * @param dna - * @param seqs - * @param dataset - * @param fake - * - don't actually build lists - just get types - * @return public static Object[] buildXProductsList(boolean dna, SequenceI[] - * seqs, AlignmentI dataset, boolean fake) { String types[] = - * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs, - * dataset); if (types != null) { System.out.println("Xref Types for: - * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) { - * System.out.println("Type: " + types[t]); SequenceI[] prod = - * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]); - * System.out.println("Found " + ((prod == null) ? "no" : "" + - * prod.length) + " products"); if (prod!=null) { for (int p=0; - * p -1)) : false; } } + + /** + * toString method returns the wrapped sequence id. For debugging purposes + * only, behaviour not guaranteed not to change. + */ + @Override + public String toString() + { + return id; + } } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 6d6cdb5..a4b99bf 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -23,6 +23,7 @@ package jalview.datamodel; import jalview.util.MapList; import jalview.util.MappingUtils; +import java.util.AbstractList; import java.util.ArrayList; import java.util.List; @@ -36,7 +37,7 @@ public class AlignedCodonFrame /* * Data bean to hold mappings from one sequence to another */ - private class SequenceToSequenceMapping + public class SequenceToSequenceMapping { private SequenceI fromSeq; @@ -57,6 +58,54 @@ public class AlignedCodonFrame return String.format("From %s %s", fromSeq.getName(), mapping.toString()); } + + /** + * Returns a hashCode derived from the hashcodes of the mappings and fromSeq + * + * @see SequenceToSequenceMapping#hashCode() + */ + @Override + public int hashCode() + { + return (fromSeq == null ? 0 : fromSeq.hashCode() * 31) + + mapping.hashCode(); + } + + /** + * Answers true if the objects hold the same mapping between the same two + * sequences + * + * @see Mapping#equals + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof SequenceToSequenceMapping)) + { + return false; + } + SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj; + if (this.mapping == null) + { + return that.mapping == null; + } + // TODO: can simplify by asserting fromSeq is a dataset sequence + return (this.fromSeq == that.fromSeq || (this.fromSeq != null + && that.fromSeq != null + && this.fromSeq.getDatasetSequence() != null && this.fromSeq + .getDatasetSequence() == that.fromSeq + .getDatasetSequence())) && this.mapping.equals(that.mapping); + } + + public SequenceI getFromSeq() + { + return fromSeq; + } + + public Mapping getMapping() + { + return mapping; + } } private List mappings; @@ -90,6 +139,8 @@ public class AlignedCodonFrame /* * if we already hold a mapping between these sequences, just add to it + * note that 'adding' a duplicate map does nothing; this protects against + * creating duplicate mappings in AlignedCodonFrame */ for (SequenceToSequenceMapping ssm : mappings) { @@ -674,4 +725,37 @@ public class AlignedCodonFrame } return null; } + + /** + * Returns a hashcode derived from the list of sequence mappings + * + * @see SequenceToSequenceMapping#hashCode() + * @see AbstractList#hashCode() + */ + @Override + public int hashCode() + { + return this.mappings.hashCode(); + } + + /** + * Two AlignedCodonFrame objects are equal if they hold the same ordered list + * of mappings + * + * @see SequenceToSequenceMapping# + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof AlignedCodonFrame)) + { + return false; + } + return this.mappings.equals(((AlignedCodonFrame) obj).mappings); + } + + public List getMappings() + { + return mappings; + } } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index f14539b..c9ec77b 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -45,7 +45,7 @@ import java.util.Vector; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; protected List sequences; @@ -110,7 +110,10 @@ public class Alignment implements AlignmentI /* * Share the same dataset sequence mappings (if any). */ - this.setCodonFrames(al.getCodonFrames()); + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); + } } /** @@ -987,7 +990,7 @@ public class Alignment implements AlignmentI } @Override - public void setDataset(Alignment data) + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { @@ -995,7 +998,12 @@ public class Alignment implements AlignmentI } else if (dataset == null && data != null) { - dataset = data; + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { SequenceI currentSeq = getSequenceAt(i); @@ -1357,6 +1365,10 @@ public class Alignment implements AlignmentI @Override public List getCodonFrames() { + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset return dataset != null ? dataset.getCodonFrames() : codonFrameList; } @@ -1418,6 +1430,7 @@ public class Alignment implements AlignmentI addAnnotation(alan[a]); } + // use add method getCodonFrames().addAll(toappend.getCodonFrames()); List sg = toappend.getGroups(); diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index 4ae8ba2..f1238ff 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -305,7 +305,7 @@ public interface AlignmentI extends AnnotatedCollectionI * @return Alignment containing dataset sequences or null of this is a * dataset. */ - Alignment getDataset(); + AlignmentI getDataset(); /** * Set the associated dataset for the alignment, or create one. @@ -313,7 +313,7 @@ public interface AlignmentI extends AnnotatedCollectionI * @param dataset * The dataset alignment or null to construct one. */ - void setDataset(Alignment dataset); + void setDataset(AlignmentI dataset); /** * pads sequences with gaps (to ensure the set looks like an alignment) diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index 66a075e..efdf0ac 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -150,6 +150,7 @@ public class DBRefEntry implements DBRefEntryI * otherwise the versions have to match */ String otherVersion = other.getVersion(); + if ((version == null || version.equals("0") || version.endsWith(":0")) && otherVersion != null) { @@ -157,7 +158,9 @@ public class DBRefEntry implements DBRefEntryI } else { - if (!version.equalsIgnoreCase(otherVersion)) + if (version != null + && (otherVersion == null || !version + .equalsIgnoreCase(otherVersion))) { return false; } diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index bd83fe9..b4489e2 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -356,14 +356,13 @@ public class Mapping /** * Equals that compares both the to references and MapList mappings. * - * @param other + * @param o * @return + * @see MapList#equals */ @Override public boolean equals(Object o) { - // TODO should override Object.hashCode() to ensure that equal objects have - // equal hashcodes if (o == null || !(o instanceof Mapping)) { return false; @@ -390,6 +389,21 @@ public class Mapping } /** + * Returns a hashCode made from the sequence and maplist + */ + @Override + public int hashCode() + { + int hashCode = (this.to == null ? 1 : this.to.hashCode()); + if (this.map != null) + { + hashCode = hashCode * 31 + this.map.hashCode(); + } + + return hashCode; + } + + /** * get the 'initial' position in the associated sequence for a position in the * mapped reference frame * diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 151d8c4..31ffdfd 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -1086,6 +1086,25 @@ public class Sequence extends ASequence implements SequenceI return new Sequence(this); } + private boolean _isNa; + + private long _seqhash = 0; + + @Override + public boolean isProtein() + { + if (datasetSequence != null) + { + return datasetSequence.isProtein(); + } + if (_seqhash != sequence.hashCode()) + { + _seqhash = sequence.hashCode(); + _isNa=jalview.util.Comparison.isNucleotide(new SequenceI[] { this }); + } + return !_isNa; + }; + /* * (non-Javadoc) * diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 69eb1d4..355e271 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -219,6 +219,12 @@ public interface SequenceI extends ASequenceI public int[] findPositionMap(); /** + * + * @return true if sequence is composed of amino acid characters + */ + public boolean isProtein(); + + /** * Delete a range of aligned sequence columns, creating a new dataset sequence * if necessary and adjusting start and end positions accordingly. * diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index f8c0bbe..5409d5b 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -198,10 +198,15 @@ public class EmblEntry retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1, 1)); + + /* + * transform EMBL Database refs to canonical form + */ if (dbRefs != null) { for (DBRefEntry dbref : dbRefs) { + dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource())); dna.addDBRef(dbref); } } @@ -211,13 +216,6 @@ public class EmblEntry { for (EmblFeature feature : features) { - if (feature.dbRefs != null) - { - for (DBRefEntry dbref : feature.dbRefs) - { - dna.addDBRef(dbref); - } - } if (FeatureProperties.isCodingFeature(sourceDb, feature.getName())) { parseCodingFeature(feature, sourceDb, dna, peptides, matcher); @@ -248,6 +246,8 @@ public class EmblEntry * parent dna sequence for this record * @param peptides * list of protein product sequences for Embl entry + * @param matcher + * helper to match xrefs in already retrieved sequences */ void parseCodingFeature(EmblFeature feature, String sourceDb, SequenceI dna, List peptides, SequenceIdMatcher matcher) @@ -428,6 +428,9 @@ public class EmblEntry boolean mappingUsed = false; for (DBRefEntry ref : feature.dbRefs) { + /* + * ensure UniProtKB/Swiss-Prot converted to UNIPROT + */ ref.setSource(DBRefUtils.getCanonicalName(ref.getSource())); if (ref.getSource().equals(DBRefSource.UNIPROT)) { diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 133aab4..54b1cb6 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4633,38 +4633,38 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Searches selected sequences for xRef products and builds the Show - * Cross-References menu (formerly called Show Products) + * Searches the alignment sequences for xRefs and builds the Show + * Cross-References menu (formerly called Show Products), with database + * sources for which cross-references are found (protein sources for a + * nucleotide alignment and vice versa) * - * @return true if Show Cross-references menu should be enabled. + * @return true if Show Cross-references menu should be enabled */ public boolean canShowProducts() { - SequenceI[] selection = viewport.getSequenceSelection(); + SequenceI[] seqs = viewport.getAlignment().getSequencesArray(); AlignmentI dataset = viewport.getAlignment().getDataset(); boolean showp = false; try { showProducts.removeAll(); final boolean dna = viewport.getAlignment().isNucleotide(); - String[] ptypes = (selection == null || selection.length == 0) ? null - : CrossRef.findSequenceXrefTypes(dna, selection, dataset); + List ptypes = (seqs == null || seqs.length == 0) ? null + : new CrossRef(seqs, dataset) + .findXrefSourcesForSequences(dna); - for (int t = 0; ptypes != null && t < ptypes.length; t++) + for (final String source : ptypes) { showp = true; final AlignFrame af = this; - final String source = ptypes[t]; - JMenuItem xtype = new JMenuItem(ptypes[t]); + JMenuItem xtype = new JMenuItem(source); xtype.addActionListener(new ActionListener() { - @Override public void actionPerformed(ActionEvent e) { showProductsFor(af.viewport.getSequenceSelection(), dna, source); } - }); showProducts.add(xtype); } @@ -4672,7 +4672,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, showProducts.setEnabled(showp); } catch (Exception e) { - jalview.bin.Cache.log + Cache.log .warn("canShowProducts threw an exception - please report to help@jalview.org", e); return false; @@ -4691,7 +4691,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * @param source * the database to show cross-references for */ - protected void showProductsFor(final SequenceI[] sel, final boolean dna, + protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { Runnable foo = new Runnable() @@ -4708,156 +4708,161 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { AlignmentI alignment = AlignFrame.this.getViewport() .getAlignment(); - AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source, - alignment); - if (xrefs != null) + AlignmentI dataset = alignment.getDataset() == null ? alignment + : alignment.getDataset(); + boolean dna = alignment.isNucleotide(); + if (_odna!=dna) { - /* - * get display scheme (if any) to apply to features - */ - FeatureSettingsModelI featureColourScheme = new SequenceFetcher() - .getFeatureColourScheme(source); + System.err + .println("Conflict: showProducts for alignment originally " + + "thought to be " + + (_odna ? "DNA" : "Protein") + + " now searching for " + + (dna ? "DNA" : "Protein") + " Context."); + } + AlignmentI xrefs = new CrossRef(sel, dataset) + .findXrefSequences(source, dna); + if (xrefs == null) + { + return; + } + /* + * get display scheme (if any) to apply to features + */ + FeatureSettingsModelI featureColourScheme = new SequenceFetcher() + .getFeatureColourScheme(source); - AlignmentI al = makeCrossReferencesAlignment( - alignment.getDataset(), xrefs); + AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, + xrefs); - AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH, + AlignFrame newFrame = new AlignFrame(xrefsAlignment, DEFAULT_WIDTH, + DEFAULT_HEIGHT); + if (Cache.getDefault("HIDE_INTRONS", true)) + { + newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); + } + String newtitle = String.format("%s %s %s", MessageManager + .getString(dna ? "label.proteins" : "label.nucleotides"), + MessageManager.getString("label.for"), getTitle()); + newFrame.setTitle(newtitle); + + if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + { + /* + * split frame display is turned off in preferences file + */ + Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, DEFAULT_HEIGHT); - if (Cache.getDefault("HIDE_INTRONS", true)) - { - newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); - } - String newtitle = String.format("%s %s %s", - MessageManager.getString(dna ? "label.proteins" - : "label.nucleotides"), MessageManager - .getString("label.for"), getTitle()); - newFrame.setTitle(newtitle); + return; // via finally clause + } - if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + /* + * Make a copy of this alignment (sharing the same dataset + * sequences). If we are DNA, drop introns and update mappings + */ + AlignmentI copyAlignment = null; + final SequenceI[] sequenceSelection = AlignFrame.this.viewport + .getSequenceSelection(); + // List cf = xrefs.getCodonFrames(); + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment( + sequenceSelection, dataset); + if (copyAlignment.getHeight() == 0) { - /* - * split frame display is turned off in preferences file - */ - Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, - DEFAULT_HEIGHT); - return; // via finally clause + System.err.println("Failed to make CDS alignment"); } /* - * Make a copy of this alignment (sharing the same dataset - * sequences). If we are DNA, drop introns and update mappings + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl */ - AlignmentI copyAlignment = null; - final SequenceI[] sequenceSelection = AlignFrame.this.viewport - .getSequenceSelection(); - List cf = xrefs.getCodonFrames(); - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, cf, alignment); - if (copyAlignment.getHeight() == 0) - { - System.err.println("Failed to make CDS alignment"); - } - al.getCodonFrames().clear(); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) { - copyAlignment = AlignmentUtils.makeCopyAlignment( - sequenceSelection, xrefs.getSequencesArray()); - copyAlignment.addCodonFrames(cf); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; } - copyAlignment.setGapCharacter(AlignFrame.this.viewport - .getGapCharacter()); + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment( + sequenceSelection, xrefs.getSequencesArray()); + } + copyAlignment.setGapCharacter(AlignFrame.this.viewport + .getGapCharacter()); - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.registerMappings(cf); + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); - if (copyAlignment.getHeight() <= 0) - { - System.err.println("No Sequences generated for xRef type " - + source); - return; - } + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println("No Sequences generated for xRef type " + + source); + return; + } + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { /* - * align protein to dna + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! */ - if (dna && copyAlignmentIsAligned) + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) { - al.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(al); - } + copyAlignment.alignAs(xrefsAlignment); } + } - AlignFrame copyThis = new AlignFrame(copyAlignment, - AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); - copyThis.setTitle(AlignFrame.this.getTitle()); + AlignFrame copyThis = new AlignFrame(copyAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + copyThis.setTitle(AlignFrame.this.getTitle()); - boolean showSequenceFeatures = viewport - .isShowSequenceFeatures(); - newFrame.setShowSeqFeatures(showSequenceFeatures); - copyThis.setShowSeqFeatures(showSequenceFeatures); - FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer(); + boolean showSequenceFeatures = viewport.isShowSequenceFeatures(); + newFrame.setShowSeqFeatures(showSequenceFeatures); + copyThis.setShowSeqFeatures(showSequenceFeatures); + FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); - /* - * copy feature rendering settings to split frame - */ - newFrame.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); + /* + * copy feature rendering settings to split frame + */ + newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); + copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); - /* - * apply 'database source' feature configuration - * if any was found - */ - // TODO is this the feature colouring for the original - // alignment or the fetched xrefs? either could be Ensembl - newFrame.getViewport().applyFeaturesStyle(featureColourScheme); - copyThis.getViewport().applyFeaturesStyle(featureColourScheme); - - SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, - dna ? newFrame : copyThis); - newFrame.setVisible(true); - copyThis.setVisible(true); - String linkedTitle = MessageManager - .getString("label.linked_view_title"); - Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); - } - } catch (Exception e) - { - Cache.log.error("Exception when finding crossreferences", e); + /* + * apply 'database source' feature configuration + * if any was found + */ + // TODO is this the feature colouring for the original + // alignment or the fetched xrefs? either could be Ensembl + newFrame.getViewport().applyFeaturesStyle(featureColourScheme); + copyThis.getViewport().applyFeaturesStyle(featureColourScheme); + + SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, + dna ? newFrame : copyThis); + newFrame.setVisible(true); + copyThis.setVisible(true); + String linkedTitle = MessageManager + .getString("label.linked_view_title"); + Desktop.addInternalFrame(sf, linkedTitle, -1, -1); + sf.adjustDivider(); } catch (OutOfMemoryError e) { new OOMWarning("whilst fetching crossreferences", e); @@ -4873,11 +4878,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Makes an alignment containing the given sequences. If this is of the - * same type as the given dataset (nucleotide/protein), then the new - * alignment shares the same dataset, and its dataset sequences are added - * to it. Otherwise a new dataset sequence is created for the - * cross-references. + * Makes an alignment containing the given sequences, and adds them to the + * given dataset, which is also set as the dataset for the new alignment * * @param dataset * @param seqs @@ -4886,32 +4888,20 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, AlignmentI seqs) { - boolean sameType = dataset.isNucleotide() == seqs.isNucleotide(); - SequenceI[] sprods = new SequenceI[seqs.getHeight()]; for (int s = 0; s < sprods.length; s++) { sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); - if (sameType) + if (dataset.getSequences() == null + || !dataset.getSequences().contains( + sprods[s].getDatasetSequence())) { - if (dataset.getSequences() == null - || !dataset.getSequences().contains( - sprods[s].getDatasetSequence())) - { - dataset.addSequence(sprods[s].getDatasetSequence()); - } + dataset.addSequence(sprods[s].getDatasetSequence()); } sprods[s].updatePDBIds(); } Alignment al = new Alignment(sprods); - if (sameType) - { - al.setDataset((Alignment) dataset); - } - else - { - al.createDatasetAlignment(); - } + al.setDataset(dataset); return al; } diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index eeb4214..df076d9 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -2714,7 +2714,7 @@ public class Jalview2XML SequenceI[] orderedSeqs = tmpseqs .toArray(new SequenceI[tmpseqs.size()]); - Alignment al = new Alignment(orderedSeqs); + AlignmentI al = new Alignment(orderedSeqs); if (referenceseqForView != null) { @@ -4097,7 +4097,7 @@ public class Jalview2XML } AlignFrame loadViewport(String file, JSeq[] JSEQ, - List hiddenSeqs, Alignment al, + List hiddenSeqs, AlignmentI al, JalviewModelSequence jms, Viewport view, String uniqueSeqSetId, String viewId, List autoAlan) { @@ -4450,7 +4450,7 @@ public class Jalview2XML } private ColourSchemeI constructAnnotationColour( - AnnotationColours viewAnnColour, AlignFrame af, Alignment al, + AnnotationColours viewAnnColour, AlignFrame af, AlignmentI al, JalviewModelSequence jms, boolean checkGroupAnnColour) { boolean propagateAnnColour = false; @@ -4574,7 +4574,7 @@ public class Jalview2XML return cs; } - private void reorderAutoannotation(AlignFrame af, Alignment al, + private void reorderAutoannotation(AlignFrame af, AlignmentI al, List autoAlan) { // copy over visualization settings for autocalculated annotation in the @@ -4729,10 +4729,11 @@ public class Jalview2XML } } - private void recoverDatasetFor(SequenceSet vamsasSet, Alignment al, + private void recoverDatasetFor(SequenceSet vamsasSet, AlignmentI al, boolean ignoreUnrefed) { - jalview.datamodel.Alignment ds = getDatasetFor(vamsasSet.getDatasetId()); + jalview.datamodel.AlignmentI ds = getDatasetFor(vamsasSet + .getDatasetId()); Vector dseqs = null; if (ds == null) { @@ -4882,15 +4883,15 @@ public class Jalview2XML * TODO use AlignmentI here and in related methods - needs * AlignmentI.getDataset() changed to return AlignmentI instead of Alignment */ - Hashtable datasetIds = null; + Hashtable datasetIds = null; - IdentityHashMap dataset2Ids = null; + IdentityHashMap dataset2Ids = null; - private Alignment getDatasetFor(String datasetId) + private AlignmentI getDatasetFor(String datasetId) { if (datasetIds == null) { - datasetIds = new Hashtable(); + datasetIds = new Hashtable(); return null; } if (datasetIds.containsKey(datasetId)) @@ -4900,11 +4901,11 @@ public class Jalview2XML return null; } - private void addDatasetRef(String datasetId, Alignment dataset) + private void addDatasetRef(String datasetId, AlignmentI dataset) { if (datasetIds == null) { - datasetIds = new Hashtable(); + datasetIds = new Hashtable(); } datasetIds.put(datasetId, dataset); } @@ -4915,7 +4916,7 @@ public class Jalview2XML * @param dataset * @return */ - private String getDatasetIdRef(Alignment dataset) + private String getDatasetIdRef(AlignmentI dataset) { if (dataset.getDataset() != null) { @@ -4927,7 +4928,7 @@ public class Jalview2XML // make a new datasetId and record it if (dataset2Ids == null) { - dataset2Ids = new IdentityHashMap(); + dataset2Ids = new IdentityHashMap(); } else { diff --git a/src/jalview/gui/PCAPanel.java b/src/jalview/gui/PCAPanel.java index 47add28..2b09eb6 100644 --- a/src/jalview/gui/PCAPanel.java +++ b/src/jalview/gui/PCAPanel.java @@ -21,6 +21,7 @@ package jalview.gui; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SeqCigar; @@ -383,8 +384,8 @@ public class PCAPanel extends GPCAPanel implements Runnable, { // AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]); - Alignment al = new Alignment((SequenceI[]) alAndColsel[0]); - Alignment dataset = (av != null && av.getAlignment() != null) ? av + AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]); + AlignmentI dataset = (av != null && av.getAlignment() != null) ? av .getAlignment().getDataset() : null; if (dataset != null) { diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 03bb375..828a2aa 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -836,10 +836,8 @@ public class SequenceFetcher extends JPanel implements Runnable Cache.log.info( "Error retrieving " + accession + " from " + proxy.getDbName(), e); - } finally - { - return success; } + return success; } /** @@ -859,7 +857,6 @@ public class SequenceFetcher extends JPanel implements Runnable for (String q : queries) { - DBRefEntry[] found = null; DBRefEntry dbr = new DBRefEntry(); dbr.setSource(proxy.getDbSource()); dbr.setVersion(null); @@ -870,8 +867,9 @@ public class SequenceFetcher extends JPanel implements Runnable { if (rs[r] != null) { - found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId); - if (found != null && found.length > 0) + List found = DBRefUtils.searchRefs(rs[r].getDBRefs(), + accId); + if (!found.isEmpty()) { rfound = true; break; diff --git a/src/jalview/gui/TreePanel.java b/src/jalview/gui/TreePanel.java index d78350d..fafa610 100755 --- a/src/jalview/gui/TreePanel.java +++ b/src/jalview/gui/TreePanel.java @@ -520,8 +520,8 @@ public class TreePanel extends GTreePanel { // AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]); - Alignment al = new Alignment((SequenceI[]) alAndColsel[0]); - Alignment dataset = (av != null && av.getAlignment() != null) ? av + AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]); + AlignmentI dataset = (av != null && av.getAlignment() != null) ? av .getAlignment().getDataset() : null; if (dataset != null) { diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 5605a53..0beb45b 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -249,6 +249,18 @@ public class Comparison } /** + * Overloaded method signature to test whether a single sequence is nucleotide + * (that is, more than 85% CGTA) + * + * @param seq + * @return + */ + public static final boolean isNucleotide(SequenceI seq) + { + return isNucleotide(new SequenceI[] { seq }); + } + + /** * Answers true if more than 85% of the sequence residues (ignoring gaps) are * A, G, C, T or U, else false. This is just a heuristic guess and may give a * wrong answer (as AGCT are also amino acid codes). diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 424d40b..ed6d860 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -67,11 +67,14 @@ public class DBRefUtils } /** + * Returns those DBRefEntry objects whose source identifier (once converted to + * Jalview's canonical form) is in the list of sources to search for. Returns + * null if no matches found. * * @param dbrefs - * array of DBRef objects to search + * DBRefEntry objects to search * @param sources - * String[] array of source DBRef IDs to retrieve + * array of sources to select * @return */ public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs, @@ -148,8 +151,8 @@ public class DBRefUtils } /** - * Returns an array of those references that match the given entry, or null if - * no matches. Currently uses a comparator which matches if + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if *
    *
  • database sources are the same
  • *
  • accession ids are the same
  • @@ -162,34 +165,35 @@ public class DBRefUtils * pattern to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry) + public static List searchRefs(DBRefEntry[] ref, + DBRefEntry entry) { return searchRefs(ref, entry, matchDbAndIdAndEitherMapOrEquivalentMapList); } /** - * Returns an array of those references that match the given accession id + * Returns a list of those references that match the given accession id *
      *
    • database sources are the same
    • *
    • accession ids are the same
    • *
    • both have no mapping, or the mappings are the same
    • *
    * - * @param ref + * @param refs * Set of references to search - * @param entry - * pattern to match + * @param accId + * accession id to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId) + public static List searchRefs(DBRefEntry[] refs, String accId) { - return searchRefs(ref, new DBRefEntry("", "", accId), matchId); + return searchRefs(refs, new DBRefEntry("", "", accId), matchId); } /** - * Returns an array of those references that match the given entry, according - * to the given comparator. Returns null if no matches. + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. * * @param refs * an array of database references to search @@ -198,14 +202,14 @@ public class DBRefUtils * @param comparator * @return */ - static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry, + static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, DbRefComp comparator) { + List rfs = new ArrayList(); if (refs == null || entry == null) { - return null; + return rfs; } - List rfs = new ArrayList(); for (int i = 0; i < refs.length; i++) { if (comparator.matches(entry, refs[i])) @@ -213,7 +217,7 @@ public class DBRefUtils rfs.add(refs[i]); } } - return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]); + return rfs; } interface DbRefComp @@ -380,9 +384,9 @@ public class DBRefUtils }; /** - * accession ID and DB must be identical. Version is ignored. No map on either - * or map but no maplist on either or maplist of map on a is equivalent to the - * maplist of map on b. + * accession ID and DB must be identical, or null on a. Version is ignored. No + * map on either or map but no maplist on either or maplist of map on a is + * equivalent to the maplist of map on b. */ public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { @@ -393,8 +397,9 @@ public class DBRefUtils && refb.getSource().equals(refa.getSource())) { // We dont care about version - if (refa.getAccessionId() != null && refb.getAccessionId() != null - && refb.getAccessionId().equals(refa.getAccessionId())) + + if (refa.getAccessionId() == null + || refa.getAccessionId().equals(refb.getAccessionId())) { if (refa.getMap() == null || refb.getMap() == null) { @@ -406,7 +411,7 @@ public class DBRefUtils || (refb.getMap().getMap() != null && refa.getMap().getMap() != null && (refb .getMap().getMap().equals(refa.getMap().getMap())))) - { // getMap().getMap().containsEither(false,refa.getMap().getMap()) + { return true; } } @@ -519,4 +524,49 @@ public class DBRefUtils return (o1 == null ? o2.equals(o1) : o1.equals(o2)); } + /** + * Selects just the DNA or protein references from a set of references + * + * @param selectDna + * if true, select references to 'standard' DNA databases, else to + * 'standard' peptide databases + * @param refs + * a set of references to select from + * @return + */ + public static DBRefEntry[] selectDbRefs(boolean selectDna, + DBRefEntry[] refs) + { + return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS + : DBRefSource.PROTEINDBS); + // could attempt to find other cross + // refs here - ie PDB xrefs + // (not dna, not protein seq) + } + + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source databse + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource(DBRefEntry[] dbRefs, + String source) + { + List matches = new ArrayList(); + if (dbRefs != null && source != null) + { + for (DBRefEntry dbref : dbRefs) + { + if (source.equals(dbref.getSource())) + { + matches.add(dbref); + } + } + } + return matches; + } + } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index e51442c..cae968e 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -88,8 +88,6 @@ public class MapList @Override public boolean equals(Object o) { - // TODO should also override hashCode to ensure equal objects have equal - // hashcodes if (o == null || !(o instanceof MapList)) { return false; @@ -112,6 +110,19 @@ public class MapList } /** + * Returns a hashcode made from the fromRatio, toRatio, and from/to ranges + */ + @Override + public int hashCode() + { + int hashCode = 31 * fromRatio; + hashCode = 31 * hashCode + toRatio; + hashCode = 31 * hashCode + fromShifts.toArray().hashCode(); + hashCode = 31 * hashCode + toShifts.toArray().hashCode(); + return hashCode; + } + + /** * Returns the 'from' ranges as {[start1, end1], [start2, end2], ...} * * @return @@ -215,7 +226,7 @@ public class MapList { /* * note lowest and highest values - bearing in mind the - * direction may be revesed + * direction may be reversed */ fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1])); fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1])); @@ -992,6 +1003,10 @@ public class MapList */ public void addMapList(MapList map) { + if (this.equals(map)) + { + return; + } this.fromLowest = Math.min(fromLowest, map.fromLowest); this.toLowest = Math.min(toLowest, map.toLowest); this.fromHighest = Math.max(fromHighest, map.fromHighest); @@ -1087,4 +1102,5 @@ public class MapList } return forwardStrand; } + } diff --git a/src/jalview/ws/AWSThread.java b/src/jalview/ws/AWSThread.java index b158448..2ef5256 100644 --- a/src/jalview/ws/AWSThread.java +++ b/src/jalview/ws/AWSThread.java @@ -31,9 +31,8 @@ import jalview.gui.WebserviceInfo; import jalview.util.MessageManager; import jalview.viewmodel.seqfeatures.FeatureRendererSettings; -import java.util.LinkedHashSet; +import java.util.ArrayList; import java.util.List; -import java.util.Set; public abstract class AWSThread extends Thread { @@ -61,7 +60,7 @@ public abstract class AWSThread extends Thread /** * dataset sequence relationships to be propagated onto new results */ - protected Set codonframe = null; + protected List codonframe = null; /** * are there jobs still running in this thread. @@ -384,7 +383,7 @@ public abstract class AWSThread extends Thread .getCodonFrames(); if (cf != null) { - codonframe = new LinkedHashSet(); + codonframe = new ArrayList(); codonframe.addAll(cf); } } diff --git a/src/jalview/ws/SequenceFetcherFactory.java b/src/jalview/ws/SequenceFetcherFactory.java new file mode 100644 index 0000000..2b8f364 --- /dev/null +++ b/src/jalview/ws/SequenceFetcherFactory.java @@ -0,0 +1,32 @@ +package jalview.ws; + +import jalview.ws.seqfetcher.ASequenceFetcher; + +public class SequenceFetcherFactory +{ + + private static SequenceFetcher instance; + + /** + * Returns a new SequenceFetcher object, or a mock object if one has been set + * + * @return + */ + public static ASequenceFetcher getSequenceFetcher() + { + return instance == null ? new SequenceFetcher() : instance; + } + + /** + * Set the instance object to use (intended for unit testing with mock + * objects). + * + * Be sure to reset to null in the tearDown method of any tests! + * + * @param sf + */ + public static void setSequenceFetcher(SequenceFetcher sf) + { + instance = sf; + } +} diff --git a/src/jalview/ws/jws1/MsaWSClient.java b/src/jalview/ws/jws1/MsaWSClient.java index 95f5527..aad72b1 100644 --- a/src/jalview/ws/jws1/MsaWSClient.java +++ b/src/jalview/ws/jws1/MsaWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws1; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -34,7 +34,6 @@ import javax.swing.JMenu; import javax.swing.JMenuItem; import javax.swing.JOptionPane; -import ext.vamsas.MuscleWS; import ext.vamsas.MuscleWSServiceLocator; import ext.vamsas.MuscleWSSoapBindingStub; import ext.vamsas.ServiceHandle; @@ -72,7 +71,7 @@ public class MsaWSClient extends WS1Client public MsaWSClient(ext.vamsas.ServiceHandle sh, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { super(); @@ -109,7 +108,7 @@ public class MsaWSClient extends WS1Client } private void startMsaWSClient(String altitle, AlignmentView msa, - boolean submitGaps, boolean preserveOrder, Alignment seqdataset) + boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset) { if (!locateWebService()) { @@ -159,7 +158,7 @@ public class MsaWSClient extends WS1Client try { - this.server = (MuscleWS) loc.getMuscleWS(new java.net.URL(WsURL)); + this.server = loc.getMuscleWS(new java.net.URL(WsURL)); ((MuscleWSSoapBindingStub) this.server).setTimeout(60000); // One minute // timeout } catch (Exception ex) @@ -201,6 +200,7 @@ public class MsaWSClient extends WS1Client return (WebServiceName.indexOf("lustal") > -1); // cheat! } + @Override public void attachWSMenuEntry(JMenu msawsmenu, final ServiceHandle serviceHandle, final AlignFrame alignFrame) { @@ -209,6 +209,7 @@ public class MsaWSClient extends WS1Client method.setToolTipText(WsURL); method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = alignFrame.gatherSequencesForAlignment(); @@ -228,6 +229,7 @@ public class MsaWSClient extends WS1Client methodR.setToolTipText(WsURL); methodR.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = alignFrame.gatherSequencesForAlignment(); diff --git a/src/jalview/ws/jws1/MsaWSThread.java b/src/jalview/ws/jws1/MsaWSThread.java index be21de7..3fd7c5a 100644 --- a/src/jalview/ws/jws1/MsaWSThread.java +++ b/src/jalview/ws/jws1/MsaWSThread.java @@ -23,6 +23,7 @@ package jalview.ws.jws1; import jalview.analysis.AlignSeq; import jalview.bin.Cache; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; @@ -147,6 +148,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI * * @return true if getAlignment will return a valid alignment result. */ + @Override public boolean hasResults() { if (subjobComplete && result != null && result.isFinished() @@ -273,6 +275,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI * * @return boolean true if job can be submitted. */ + @Override public boolean hasValidInput() { if (seqs.getSeqs() != null) @@ -285,7 +288,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -327,7 +330,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI MsaWSThread(ext.vamsas.MuscleWS server, String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, boolean subgaps, - boolean presorder, Alignment seqset) + boolean presorder, AlignmentI seqset) { this(server, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder); OutputHeader = wsInfo.getProgressText(); @@ -359,11 +362,13 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public boolean isCancellable() { return true; } + @Override public void cancelJob() { if (!jobComplete && jobs != null) @@ -430,11 +435,13 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public void pollJob(AWsJob job) throws Exception { ((MsaWSJob) job).result = server.getResult(((MsaWSJob) job).getJobId()); } + @Override public void StartJob(AWsJob job) { if (!(job instanceof MsaWSJob)) @@ -521,6 +528,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI return msa; } + @Override public void parseResult() { int results = 0; // number of result sets received @@ -571,6 +579,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI wsInfo.showResultsNewFrame .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(true); @@ -579,6 +588,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI wsInfo.mergeResults .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(false); @@ -661,7 +671,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI while (j < l) { if (((AlignmentOrder) alorders.get(i)) - .equals(((AlignmentOrder) alorders.get(j)))) + .equals((alorders.get(j)))) { alorders.remove(j); l--; @@ -704,6 +714,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public boolean canMergeResults() { return false; diff --git a/src/jalview/ws/jws1/SeqSearchWSClient.java b/src/jalview/ws/jws1/SeqSearchWSClient.java index d731ced..2d83bf9 100644 --- a/src/jalview/ws/jws1/SeqSearchWSClient.java +++ b/src/jalview/ws/jws1/SeqSearchWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws1; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -39,7 +39,6 @@ import javax.swing.JMenu; import javax.swing.JMenuItem; import javax.swing.JOptionPane; -import ext.vamsas.SeqSearchI; import ext.vamsas.SeqSearchServiceLocator; import ext.vamsas.SeqSearchServiceSoapBindingStub; import ext.vamsas.ServiceHandle; @@ -77,7 +76,7 @@ public class SeqSearchWSClient extends WS1Client public SeqSearchWSClient(ext.vamsas.ServiceHandle sh, String altitle, jalview.datamodel.AlignmentView msa, String db, - Alignment seqdataset, AlignFrame _alignFrame) + AlignmentI seqdataset, AlignFrame _alignFrame) { super(); alignFrame = _alignFrame; @@ -128,7 +127,7 @@ public class SeqSearchWSClient extends WS1Client } private void startSeqSearchClient(String altitle, AlignmentView msa, - String db, Alignment seqdataset) + String db, AlignmentI seqdataset) { if (!locateWebService()) { @@ -173,7 +172,7 @@ public class SeqSearchWSClient extends WS1Client try { - this.server = (SeqSearchI) loc.getSeqSearchService(new java.net.URL( + this.server = loc.getSeqSearchService(new java.net.URL( WsURL)); ((SeqSearchServiceSoapBindingStub) this.server).setTimeout(60000); // One // minute @@ -241,6 +240,7 @@ public class SeqSearchWSClient extends WS1Client return dbs; } + @Override public void attachWSMenuEntry(JMenu wsmenu, final ServiceHandle sh, final AlignFrame af) { @@ -281,6 +281,7 @@ public class SeqSearchWSClient extends WS1Client method.setToolTipText(sh.getEndpointURL()); method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { // use same input gatherer as for secondary structure prediction @@ -305,6 +306,7 @@ public class SeqSearchWSClient extends WS1Client final String searchdb = dbs[db]; method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = af.gatherSeqOrMsaForSecStrPrediction(); diff --git a/src/jalview/ws/jws1/SeqSearchWSThread.java b/src/jalview/ws/jws1/SeqSearchWSThread.java index 66fddd1..70056a6 100644 --- a/src/jalview/ws/jws1/SeqSearchWSThread.java +++ b/src/jalview/ws/jws1/SeqSearchWSThread.java @@ -24,6 +24,7 @@ import jalview.analysis.AlignSeq; import jalview.api.FeatureColourI; import jalview.bin.Cache; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; @@ -172,7 +173,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI * * @return null or { Alignment(+features and annotation), NewickFile)} */ - public Object[] getAlignment(Alignment dataset, + public Object[] getAlignment(AlignmentI dataset, Map featureColours) { @@ -303,7 +304,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -345,7 +346,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI SeqSearchWSThread(ext.vamsas.SeqSearchI server, String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, String db, - Alignment seqset) + AlignmentI seqset) { this(server, wsUrl, wsinfo, alFrame, _msa, wsname, db); OutputHeader = wsInfo.getProgressText(); diff --git a/src/jalview/ws/jws2/MsaWSClient.java b/src/jalview/ws/jws2/MsaWSClient.java index c83ef0f..758d941 100644 --- a/src/jalview/ws/jws2/MsaWSClient.java +++ b/src/jalview/ws/jws2/MsaWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws2; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -58,7 +58,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { this(sh, null, null, false, altitle, msa, submitGaps, preserveOrder, @@ -68,7 +68,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, WsParamSetI preset, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { this(sh, preset, null, false, altitle, msa, submitGaps, preserveOrder, @@ -95,7 +95,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, WsParamSetI preset, List arguments, boolean editParams, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { super(_alignFrame, preset, arguments); @@ -138,7 +138,7 @@ public class MsaWSClient extends Jws2Client } private void startMsaWSClient(String altitle, AlignmentView msa, - boolean submitGaps, boolean preserveOrder, Alignment seqdataset) + boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset) { // if (!locateWebService()) // { diff --git a/src/jalview/ws/jws2/MsaWSThread.java b/src/jalview/ws/jws2/MsaWSThread.java index e2f3a7c..bfae384 100644 --- a/src/jalview/ws/jws2/MsaWSThread.java +++ b/src/jalview/ws/jws2/MsaWSThread.java @@ -176,6 +176,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI * * @return true if getAlignment will return a valid alignment result. */ + @Override public boolean hasResults() { if (subjobComplete @@ -316,6 +317,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI * * @return boolean true if job can be submitted. */ + @Override public boolean hasValidInput() { // TODO: get attributes for this MsaWS instance to check if it can do two @@ -436,7 +438,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -479,7 +481,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, boolean subgaps, boolean presorder, - Alignment seqset) + AlignmentI seqset) { this(server2, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder); OutputHeader = wsInfo.getProgressText(); @@ -530,11 +532,13 @@ class MsaWSThread extends AWS2Thread implements WSClientI return validInput; } + @Override public boolean isCancellable() { return true; } + @Override public void cancelJob() { if (!jobComplete && jobs != null) @@ -605,6 +609,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public void pollJob(AWsJob job) throws Exception { // TODO: investigate if we still need to cast here in J1.6 @@ -650,6 +655,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI return changed; } + @Override public void StartJob(AWsJob job) { Exception lex = null; @@ -775,6 +781,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public void parseResult() { long progbar = System.currentTimeMillis(); @@ -889,6 +896,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI wsInfo.showResultsNewFrame .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(true); @@ -897,6 +905,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI wsInfo.mergeResults .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(false); @@ -1101,6 +1110,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public boolean canMergeResults() { return false; diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 2392476..33a917e 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -55,7 +55,7 @@ public class ASequenceFetcher /** * Constructor */ - public ASequenceFetcher() + protected ASequenceFetcher() { super(); @@ -125,20 +125,20 @@ public class ASequenceFetcher * if true, only fetch from nucleotide data sources, else peptide * @return */ - public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) + public SequenceI[] getSequences(List refs, boolean dna) { Vector rseqs = new Vector(); Hashtable> queries = new Hashtable>(); - for (int r = 0; r < refs.length; r++) + for (DBRefEntry ref : refs) { - if (!queries.containsKey(refs[r].getSource())) + if (!queries.containsKey(ref.getSource())) { - queries.put(refs[r].getSource(), new ArrayList()); + queries.put(ref.getSource(), new ArrayList()); } - List qset = queries.get(refs[r].getSource()); - if (!qset.contains(refs[r].getAccessionId())) + List qset = queries.get(ref.getSource()); + if (!qset.contains(ref.getAccessionId())) { - qset.add(refs[r].getAccessionId()); + qset.add(ref.getAccessionId()); } } Enumeration e = queries.keys(); @@ -205,15 +205,12 @@ public class ASequenceFetcher for (int is = 0; is < seqs.length; is++) { rseqs.addElement(seqs[is]); - DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is] + List frefs = DBRefUtils.searchRefs(seqs[is] .getDBRefs(), new DBRefEntry(db, null, null)); - if (frefs != null) + for (DBRefEntry dbr : frefs) { - for (DBRefEntry dbr : frefs) - { - queriesFound.add(dbr.getAccessionId()); - queriesMade.remove(dbr.getAccessionId()); - } + queriesFound.add(dbr.getAccessionId()); + queriesMade.remove(dbr.getAccessionId()); } seqs[is] = null; } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 2fc5325..3de2ce4 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -47,7 +47,6 @@ import jalview.util.MappingUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -995,29 +994,26 @@ public class AlignmentUtilsTests AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); dna.setDataset(null); - List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); /* * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2 }, mappings, dna); + dna1, dna2 }, dna.getDataset()); assertEquals(2, cds.getSequences().size()); - assertEquals("GGGTTT", cds.getSequenceAt(0) - .getSequenceAsString()); - assertEquals("GGGTTTCCC", cds.getSequenceAt(1) - .getSequenceAsString()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); /* * verify shared, extended alignment dataset @@ -1029,62 +1025,76 @@ public class AlignmentUtilsTests .contains(cds.getSequenceAt(1).getDatasetSequence())); /* - * Verify mappings from CDS to peptide and cDNA to CDS + * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset */ - assertSame(dna.getCodonFrames(), cds.getCodonFrames()); - List cdsMappings = cds.getCodonFrames(); - assertEquals(2, cdsMappings.size()); - + List cdsMappings = cds.getDataset().getCodonFrames(); + /* + * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) + */ + assertEquals(6, cdsMappings.size()); + /* + * verify that mapping sets for dna and cds alignments are different + * [not current behaviour - all mappings are on the alignment dataset] + */ + // select -> subselect type to test. + // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); + // assertEquals(4, dna.getCodonFrames().size()); + // assertEquals(4, cds.getCodonFrames().size()); + + /* + * Two mappings involve pep1 (dna to pep1, cds to pep1) * Mapping from pep1 to GGGTTT in first new exon sequence */ - List pep1Mapping = MappingUtils + List pep1Mappings = MappingUtils .findMappingsForSequence(pep1, cdsMappings); - assertEquals(1, pep1Mapping.size()); + assertEquals(2, pep1Mappings.size()); + List mappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); + assertEquals(1, mappings.size()); + // map G to GGG - SearchResults sr = MappingUtils - .buildSearchResults(pep1, 1, cdsMappings); + SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(0).getDatasetSequence(), - m.getSequence()); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 2, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(0).getDatasetSequence(), - m.getSequence()); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); /* - * Mapping from pep2 to GGGTTTCCC in second new exon sequence + * Two mappings involve pep2 (dna to pep2, cds to pep2) + * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence */ - List pep2Mapping = MappingUtils + List pep2Mappings = MappingUtils .findMappingsForSequence(pep2, cdsMappings); - assertEquals(1, pep2Mapping.size()); + assertEquals(2, pep2Mappings.size()); + mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), + pep2Mappings); + assertEquals(1, mappings.size()); // map G to GGG - sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 1, mappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 2, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC - sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 3, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } @@ -1125,40 +1135,38 @@ public class AlignmentUtilsTests new DBRefEntry("EMBLCDS", "4", "A12347")); /* + * Create the CDS alignment + */ + AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); + dna.setDataset(null); + + /* * Make the mappings from dna to protein */ - List mappings = new ArrayList(); // map ...GGG...TTT to GF MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa...ccc to KP map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa......TTT to KF map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); - mappings.add(acf); - - /* - * Create the CDS alignment; also augments the dna-to-protein mappings with - * exon-to-protein and exon-to-dna mappings - */ - AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); - dna.setDataset(null); + dna.addCodonFrame(acf); /* * execute method under test */ AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( - new SequenceI[] { dna1 }, mappings, dna); + new SequenceI[] { dna1 }, dna.getDataset()); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively @@ -1214,41 +1222,74 @@ public class AlignmentUtilsTests * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ - Iterator newMappingsIterator = cdsal - .getCodonFrames().iterator(); + List newMappings = cdsal.getCodonFrames(); - // mappings for dna1 - exon1 - pep1 - AlignedCodonFrame cdsMapping = newMappingsIterator.next(); - List dnaMappings = cdsMapping.getMappingsFromSequence(dna1); - assertEquals(3, dnaMappings.size()); - assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) - .getTo()); - assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings - .get(0).getMap().getToPosition(1)); - List peptideMappings = cdsMapping.getMappingsFromSequence(cds - .get(0).getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); - - // mappings for dna1 - cds2 - pep2 - assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1) - .getTo()); - assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings - .get(1).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1) - .getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); - - // mappings for dna1 - cds3 - pep3 - assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2) + /* + * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3) + */ + List dnaMappings = MappingUtils + .findMappingsForSequence(dna1, newMappings); + assertEquals(6, dnaMappings.size()); + + /* + * dna1 to pep1 + */ + List mappings = MappingUtils + .findMappingsForSequence(pep1, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds1 + */ + List dnaToCds1Mappings = MappingUtils + .findMappingsForSequence(cds.get(0), dnaMappings); + Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0) + .getMapping(); + assertSame(cds.get(0).getDatasetSequence(), mapping .getTo()); - assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings - .get(2).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2) - .getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); + assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping + .getMap().getToPosition(1)); + + /* + * dna1 to pep2 + */ + mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds2 + */ + List dnaToCds2Mappings = MappingUtils + .findMappingsForSequence(cds.get(1), dnaMappings); + mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping(); + assertSame(cds.get(1).getDatasetSequence(), mapping.getTo()); + assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping + .getMap().getToPosition(4)); + + /* + * dna1 to pep3 + */ + mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds3 + */ + List dnaToCds3Mappings = MappingUtils + .findMappingsForSequence(cds.get(2), dnaMappings); + mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping(); + assertSame(cds.get(2).getDatasetSequence(), mapping.getTo()); + assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping + .getMap().getToPosition(4)); } @Test(groups = { "Functional" }) @@ -1509,24 +1550,24 @@ public class AlignmentUtilsTests null)); dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f, null)); + + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); + dna.setDataset(null); - List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 12, 16, 18 }, new int[] { 1, 4 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); - AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); - dna.setDataset(null); AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2, dna3 }, mappings, dna); + dna1, dna2, dna3 }, dna.getDataset()); List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); @@ -1542,59 +1583,69 @@ public class AlignmentUtilsTests .contains(cdsSeqs.get(1).getDatasetSequence())); /* - * Verify updated mappings + * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1 + * and the same for dna2/cds2/pep2 */ - List cdsMappings = cds.getCodonFrames(); - assertEquals(2, cdsMappings.size()); + List mappings = cds.getCodonFrames(); + assertEquals(6, mappings.size()); /* - * Mapping from pep1 to GGGTTT in first new CDS sequence + * 2 mappings involve pep1 */ - List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, cdsMappings); - assertEquals(1, pep1Mapping.size()); + List pep1Mappings = MappingUtils + .findMappingsForSequence(pep1, mappings); + assertEquals(2, pep1Mappings.size()); + /* + * Get mapping of pep1 to cds1 and verify it * maps GPFG to 1-3,4-6,7-9,10-12 */ - SearchResults sr = MappingUtils - .buildSearchResults(pep1, 1, cdsMappings); + List pep1CdsMappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); + assertEquals(1, pep1CdsMappings.size()); + SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, + pep1CdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(10, m.getStart()); assertEquals(12, m.getEnd()); /* - * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence + * Get mapping of pep2 to cds2 and verify it + * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence */ - List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, cdsMappings); - assertEquals(1, pep2Mapping.size()); - sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); + List pep2Mappings = MappingUtils + .findMappingsForSequence(pep2, mappings); + assertEquals(2, pep2Mappings.size()); + List pep2CdsMappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings); + assertEquals(1, pep2CdsMappings.size()); + sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index bbc23e5..0c3e4d5 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -21,10 +21,30 @@ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNotSame; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; +import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Mapping; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.ws.SequenceFetcher; +import jalview.ws.SequenceFetcherFactory; +import java.util.ArrayList; +import java.util.List; + +import org.testng.annotations.AfterClass; import org.testng.annotations.Test; public class CrossRefTest @@ -40,27 +60,663 @@ public class CrossRefTest DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123"); DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123"); DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); + // ENSEMBL is a source of either dna or protein sequence data + DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, - ref6, ref7, ref8 }; + ref6, ref7, ref8, ref9 }; /* * Just the DNA refs: */ - DBRefEntry[] found = CrossRef.findXDbRefs(false, refs); - assertEquals(3, found.length); + DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs); + assertEquals(4, found.length); assertSame(ref5, found[0]); assertSame(ref6, found[1]); assertSame(ref7, found[2]); + assertSame(ref9, found[3]); /* * Just the protein refs: */ - found = CrossRef.findXDbRefs(true, refs); - assertEquals(4, found.length); + found = DBRefUtils.selectDbRefs(false, refs); + assertEquals(5, found.length); assertSame(ref1, found[0]); assertSame(ref2, found[1]); assertSame(ref3, found[2]); assertSame(ref4, found[3]); + assertSame(ref9, found[4]); + } + + /** + * Test the method that finds a sequence's "product" xref source databases, + * which may be direct (dbrefs on the sequence), or indirect (dbrefs on + * sequences which share a dbref with the sequence + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSourcesForSequence_proteinToDna() + { + SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); + List sources = new ArrayList(); + AlignmentI al = new Alignment(new SequenceI[] {}); + + /* + * first with no dbrefs to search + */ + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); + assertTrue(sources.isEmpty()); + + /* + * add some dbrefs to sequence + */ + // protein db is not a candidate for findXrefSources + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + // dna coding databatases are + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + // a second EMBL xref should not result in a duplicate + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); + seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); + assertEquals(4, sources.size()); + assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString()); + + /* + * add a sequence to the alignment which has a dbref to UNIPROT|A1234 + * and others to dna coding databases + */ + sources.clear(); + seq.setDBRefs(null); + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); + seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? + al.addSequence(seq2); + sources = new CrossRef(new SequenceI[] { seq, seq2 }, al) + .findXrefSourcesForSequences(false); + assertEquals(3, sources.size()); + assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); + } + + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the nucleotide sequence but on a peptide sequence in + * the alignment which which it shares a nucleotide dbref + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSequences_indirectDbrefToProtein() + { + /* + * Alignment setup: + * - nucleotide dbref EMBL|AF039662 + * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * Find UNIPROT xrefs for nucleotide + * - it has no UNIPROT dbref of its own + * - but peptide with matching nucleotide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(1, xrefs.getHeight()); + assertSame(uniprotSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the peptide sequence but on a nucleotide sequence in + * the alignment which which it shares a protein dbref + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSequences_indirectDbrefToNucleotide() + { + /* + * Alignment setup: + * - peptide dbref UNIPROT|Q9ZTS2 + * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * find EMBL xrefs for peptide sequence - it has no direct + * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned + */ + /* + * Find EMBL xrefs for peptide + * - it has no EMBL dbref of its own + * - but nucleotide with matching peptide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al) + .findXrefSequences("EMBL", false); + assertEquals(1, xrefs.getHeight()); + assertSame(emblSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has no dbref to the desired source, and there are no indirect + * references via another sequence in the alignment + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_noDbrefs() + { + /* + * two nucleotide sequences, one with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); + + /* + * find UNIPROT xrefs for peptide sequence - it has no direct + * dbrefs, and the other sequence (which has a UNIPROT dbref) is not + * equatable to it, so no results found + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al) + .findXrefSequences("UNIPROT", true); + assertNull(xrefs); + } + + /** + * Tests for the method that searches an alignment (with one sequence + * excluded) for protein/nucleotide sequences with a given cross-reference + */ + @Test(groups = { "Functional" }, enabled = true) + public void testSearchDataset() + { + /* + * nucleotide sequence with UNIPROT AND EMBL dbref + * peptide sequence with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); + + List result = new ArrayList(); + + /* + * first search for a dbref nowhere on the alignment: + */ + DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); + CrossRef testee = new CrossRef(al.getSequencesArray(), al); + boolean found = testee.searchDataset(true, dna1, dbref, result, null, + true); + assertFalse(found); + assertTrue(result.isEmpty()); + + // TODO we are setting direct=true here but it is set to + // false in Jalview code... + + /* + * search for a protein sequence with dbref UNIPROT:Q9ZTS2 + */ + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result, + null, false); // search dataset with a protein xref from a dna + // sequence to locate the protein product + assertTrue(found); + assertEquals(1, result.size()); + assertSame(pep1, result.get(0)); + + /* + * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 + */ + result.clear(); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result, + null, false); // search dataset with a protein's direct dbref to + // locate dna sequences with matching xref + assertTrue(found); + assertEquals(1, result.size()); + assertSame(dna1, result.get(0)); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with a mapping to a sequence + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_fromDbRefMap() + { + /* + * two peptide sequences each with a DBRef and SequenceFeature + */ + SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); + pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); + pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, + "group")); + SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); + pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); + pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, + 12f, "group2")); + + /* + * nucleotide sequence (to go in the alignment) + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + + /* + * add DBRefEntry's to dna1 with mappings from dna to both peptides + */ + MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, + 3, 1); + Mapping map = new Mapping(pep1, mapList); + DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.addDBRef(dbRef1); + mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); + map = new Mapping(pep2, mapList); + DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); + dna1.addDBRef(dbRef2); + + /* + * find UNIPROT xrefs for nucleotide sequence - it should pick up + * mapped sequences + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(2, xrefs.getHeight()); + + /* + * cross-refs alignment holds copies of the mapped sequences + * including copies of their dbrefs and features + */ + checkCopySequence(pep1, xrefs.getSequenceAt(0)); + checkCopySequence(pep2, xrefs.getSequenceAt(1)); + } + + /** + * Helper method to assert seq1 looks like a copy of seq2 + * + * @param seq1 + * @param seq2 + */ + private void checkCopySequence(SequenceI seq1, SequenceI seq2) + { + assertNotSame(seq1, seq2); + assertEquals(seq1.getName(), seq2.getName()); + assertEquals(seq1.getStart(), seq2.getStart()); + assertEquals(seq1.getEnd(), seq2.getEnd()); + assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); + + /* + * compare dbrefs + */ + assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); + // check one to verify a copy, not the same object + if (seq1.getDBRefs().length > 0) + { + assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); + } + + /* + * compare features + */ + assertArrayEquals(seq1.getSequenceFeatures(), + seq2.getSequenceFeatures()); + if (seq1.getSequenceFeatures().length > 0) + { + assertNotSame(seq1.getSequenceFeatures()[0], + seq2.getSequenceFeatures()[0]); + } + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with no mapping, triggering a fetch from database + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_withFetch() + { + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); + final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); + final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); + + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for nucleotide sequence + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + @AfterClass + public void tearDown() + { + SequenceFetcherFactory.setSequenceFetcher(null); + } + + /** + * Test for finding 'product' sequences for the case where both gene and + * transcript sequences have dbrefs to Uniprot. + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_forGeneAndTranscripts() + { + /* + * 'gene' sequence + */ + SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC"); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + + /* + * 'transcript' with CDS feature (supports mapping to protein) + */ + SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa"); + braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f, + null)); + + /* + * 'spliced transcript' with CDS ranges + */ + SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa"); + braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f, + null)); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f, + null)); + + /* + * TODO code is fragile - use of SequenceIdMatcher depends on fetched + * sequences having a name starting Source|Accession + * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl + */ + final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL"); + final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF"); + + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for gene and transcripts + * verify that + * - the two proteins are retrieved but not duplicated + * - mappings are built from transcript (CDS) to proteins + * - no mappings from gene to proteins + */ + SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT", + true); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + /** + *
    +   * Test that emulates this (real but simplified) case:
    +   * Alignment:          DBrefs
    +   *     UNIPROT|P0CE19  EMBL|J03321, EMBL|X06707, EMBL|M19487
    +   *     UNIPROT|P0CE20  EMBL|J03321, EMBL|X06707, EMBL|X07547
    +   * Find cross-references for EMBL. These are mocked here as
    +   *     EMBL|J03321     with mappings to P0CE18, P0CE19, P0CE20
    +   *     EMBL|X06707     with mappings to P0CE17, P0CE19, P0CE20
    +   *     EMBL|M19487     with mappings to P0CE19, Q46432
    +   *     EMBL|X07547     with mappings to P0CE20, B0BCM4
    +   * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
    +   * The 3 EMBL sequences are added to the alignment dataset.
    +   * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
    +   * alignment dataset and updated to reference the original Uniprot sequences.
    +   * For the second Uniprot sequence, the J03321 and X06707 xrefs should be 
    +   * resolved from the dataset, and only the X07547 dbref fetched.
    +   * So the end state to verify is:
    +   * - 4 cross-ref sequences returned: J03321, X06707,  M19487, X07547
    +   * - P0CE19/20 dbrefs to EMBL sequences now have mappings
    +   * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
    +   * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
    +   * 
    + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_uniprotEmblManyToMany() + { + /* + * Uniprot sequences, both with xrefs to EMBL|J03321 + * and EMBL|X07547 + */ + SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG"); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487")); + SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK"); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547")); + + /* + * EMBL sequences to be 'fetched', complete with dbrefs and mappings + * to their protein products (CDS location and translations are provided + * in EMBL XML); these should be matched to, and replaced with, + * the corresponding uniprot sequences after fetching + */ + + /* + * J03321 with mappings to P0CE19 and P0CE20 + */ + final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA"); + DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, + 3, 1); + Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + mapList); + // add a dbref to the mapped to sequence - should get copied to p0ce19 + map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875")); + dbref1.setMap(map); + j03321.addDBRef(dbref1); + DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1); + dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), + new MapList(mapList))); + j03321.addDBRef(dbref2); + + /* + * X06707 with mappings to P0CE19 and P0CE20 + */ + final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG"); + DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2)); + x06707.addDBRef(dbref3); + DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3)); + x06707.addDBRef(dbref4); + + /* + * M19487 with mapping to P0CE19 and Q46432 + */ + final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG"); + DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref5); + DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432"); + dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref6); + + /* + * X07547 with mapping to P0CE20 and B0BCM4 + */ + final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG"); + DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + new MapList(map2))); + x07547.addDBRef(dbref7); + DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4"); + dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"), + new MapList(map2))); + x07547.addDBRef(dbref8); + + /* + * mock sequence fetcher to 'return' the EMBL sequences + * TODO: Mockito would allow .thenReturn().thenReturn() here, + * and also capture and verification of the parameters + * passed in calls to getSequences() - important to verify that + * duplicate sequence fetches are not requested + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + int call = 0; + + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + call++; + if (call == 1) + { + assertEquals("Expected 3 embl seqs in first fetch", 3, + refs.size()); + return new SequenceI[] { j03321, x06707, m19487 }; + } + else + { + assertEquals("Expected 1 embl seq in second fetch", 1, + refs.size()); + return new SequenceI[] { x07547 }; + } + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find EMBL xrefs for Uniprot seqs and verify that + * - the EMBL xref'd sequences are retrieved without duplicates + * - mappings are added to the Uniprot dbrefs + * - mappings in the EMBL-to-Uniprot dbrefs are updated to the + * alignment sequences + * - dbrefs on the EMBL sequences are added to the original dbrefs + */ + SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL", + false); + + /* + * verify retrieved sequences + */ + assertNotNull(xrefs); + assertEquals(4, xrefs.getHeight()); + assertSame(j03321, xrefs.getSequenceAt(0)); + assertSame(x06707, xrefs.getSequenceAt(1)); + assertSame(m19487, xrefs.getSequenceAt(2)); + assertSame(x07547, xrefs.getSequenceAt(3)); + + /* + * verify mappings added to Uniprot-to-EMBL dbrefs + */ + Mapping mapping = p0ce19.getDBRefs()[0].getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce19.getDBRefs()[1].getMap(); + assertSame(x06707, mapping.getTo()); + mapping = p0ce20.getDBRefs()[0].getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce20.getDBRefs()[1].getMap(); + assertSame(x06707, mapping.getTo()); + + /* + * verify dbrefs on EMBL are mapped to alignment seqs + */ + assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo()); + assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo()); + assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo()); + assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo()); + + /* + * verify new dbref on EMBL dbref mapping is copied to the + * original Uniprot sequence + */ + assertEquals(4, p0ce19.getDBRefs().length); + assertEquals("PIR", p0ce19.getDBRefs()[3].getSource()); + assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId()); } + @Test(groups = "Functional") + public void testSameSequence() + { + assertTrue(CrossRef.sameSequence(null, null)); + SequenceI seq1 = new Sequence("seq1", "ABCDEF"); + assertFalse(CrossRef.sameSequence(seq1, null)); + assertFalse(CrossRef.sameSequence(null, seq1)); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF"))); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef"))); + assertFalse(CrossRef + .sameSequence(seq1, new Sequence("seq2", "ABCDE-F"))); + assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF"))); + } } diff --git a/test/jalview/analysis/SequenceIdMatcherTest.java b/test/jalview/analysis/SequenceIdMatcherTest.java index 9d3e3b6..a17270d 100644 --- a/test/jalview/analysis/SequenceIdMatcherTest.java +++ b/test/jalview/analysis/SequenceIdMatcherTest.java @@ -90,5 +90,11 @@ public class SequenceIdMatcherTest * case insensitive matching */ assertTrue(testee.equals("a12345")); + + testee = sequenceIdMatcher.new SeqIdName("UNIPROT|A12345"); + assertFalse(testee.equals("A12345")); + assertFalse(testee.equals("UNIPROT|B98765")); + assertFalse(testee.equals("UNIPROT|")); + assertTrue(testee.equals("UNIPROT")); } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index cd8a1e3..f2dd968 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -451,4 +451,30 @@ public class AlignedCodonFrameTest assertArrayEquals(new int[] { 2, 2 }, acf.getMappedRegion(seq2, seq1, 6)); } + + /** + * Tests for addMap. See also tests for MapList.addMapList + */ + @Test(groups = { "Functional" }) + public void testAddMap() + { + final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); + seq1.createDatasetSequence(); + final Sequence aseq1 = new Sequence("Seq1", "-V-L"); + aseq1.createDatasetSequence(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] { + 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + assertEquals(1, acf.getMappingsFromSequence(seq1).size()); + Mapping before = acf.getMappingsFromSequence(seq1).get(0); + + /* + * add the same map again, verify it doesn't get duplicated + */ + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + assertEquals(1, acf.getMappingsFromSequence(seq1).size()); + assertSame(before, acf.getMappingsFromSequence(seq1).get(0)); + } } diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 5a45176..08a9441 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -398,6 +398,8 @@ public class AlignmentTest // TODO should the copy constructor copy the dataset? // or make a new one referring to the same dataset sequences?? assertNull(copy.getDataset()); + // TODO test metadata is copied when AlignmentI is a dataset + // assertArrayEquals(copy.getDataset().getSequencesArray(), protein // .getDataset().getSequencesArray()); } @@ -436,8 +438,7 @@ public class AlignmentTest // TODO promote this method to AlignmentI ((Alignment) protein).createDatasetAlignment(); - // TODO this method should return AlignmentI not Alignment !! - Alignment ds = protein.getDataset(); + AlignmentI ds = protein.getDataset(); // side-effect: dataset created on second sequence assertNotNull(protein.getSequenceAt(1).getDatasetSequence()); diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 17dfcdc..5c5c5c3 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -65,6 +65,20 @@ public class SequenceTest assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]); } + @Test(groups = ("Functional")) + public void testIsProtein() + { + // test Protein + assertTrue(new Sequence("prot","ASDFASDFASDF").isProtein()); + // test DNA + assertFalse(new Sequence("prot","ACGTACGTACGT").isProtein()); + // test RNA + SequenceI sq = new Sequence("prot","ACGUACGUACGU"); + assertFalse(sq.isProtein()); + // change sequence, should trigger an update of cached result + sq.setSequence("ASDFASDFADSF"); + assertTrue(sq.isProtein()); + } @Test(groups = { "Functional" }) public void testGetAnnotation() { @@ -388,6 +402,20 @@ public class SequenceTest } /** + * test createDatasetSequence behaves to doc + */ + @Test(groups = { "Functional" }) + public void testCreateDatasetSequence() + { + SequenceI sq = new Sequence("my","ASDASD"); + assertNull(sq.getDatasetSequence()); + SequenceI rds = sq.createDatasetSequence(); + assertNotNull(rds); + assertNull(rds.getDatasetSequence()); + assertEquals(sq.getDatasetSequence(), rds); + } + + /** * Test for deriveSequence applied to a sequence with a dataset */ @Test(groups = { "Functional" }) diff --git a/test/jalview/gui/AlignViewportTest.java b/test/jalview/gui/AlignViewportTest.java index b39b2bd..dc9113c 100644 --- a/test/jalview/gui/AlignViewportTest.java +++ b/test/jalview/gui/AlignViewportTest.java @@ -36,6 +36,7 @@ import jalview.datamodel.SequenceI; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import jalview.structure.StructureSelectionManager; +import jalview.util.MapList; import java.util.ArrayList; import java.util.List; @@ -133,8 +134,13 @@ public class AlignViewportTest AlignFrame af1 = new FileLoader().LoadFileWaitTillLoaded( ">Seq1\nCAGT\n", FormatAdapter.PASTE); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 1, 4 }, + 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 4, 1 }, + 1, 1)); List mappings = new ArrayList(); mappings.add(acf1); @@ -178,10 +184,20 @@ public class AlignViewportTest ">Seq1\nRSVQ\n", FormatAdapter.PASTE); AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded( ">Seq2\nDGEL\n", FormatAdapter.PASTE); - + SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA"); + SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA"); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); + SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0); + // need to be distinct AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1, + 12 }, 1, 1)); List mappings1 = new ArrayList(); mappings1.add(acf1); @@ -231,10 +247,20 @@ public class AlignViewportTest ">Seq1\nRSVQ\n", FormatAdapter.PASTE); AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded( ">Seq2\nDGEL\n", FormatAdapter.PASTE); - + SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA"); + SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA"); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); + SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0); + // need to be distinct AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1, + 12 }, 1, 1)); List mappings1 = new ArrayList(); mappings1.add(acf1); diff --git a/test/jalview/schemes/UserColourSchemeTest.java b/test/jalview/schemes/UserColourSchemeTest.java index 88f4331..e524cb4 100644 --- a/test/jalview/schemes/UserColourSchemeTest.java +++ b/test/jalview/schemes/UserColourSchemeTest.java @@ -10,7 +10,7 @@ import org.testng.annotations.Test; public class UserColourSchemeTest { - @Test(groups = "functional") + @Test(groups = "Functional") public void testGetColourFromString() { /* diff --git a/test/jalview/structure/StructureSelectionManagerTest.java b/test/jalview/structure/StructureSelectionManagerTest.java index 999d158..d07f919 100644 --- a/test/jalview/structure/StructureSelectionManagerTest.java +++ b/test/jalview/structure/StructureSelectionManagerTest.java @@ -29,6 +29,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.FormatAdapter; import jalview.io.StructureFile; +import jalview.util.MapList; import java.util.ArrayList; import java.util.List; @@ -51,7 +52,11 @@ public class StructureSelectionManagerTest public void testRegisterMapping() { AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); ssm.registerMapping(acf1); assertEquals(1, ssm.getSequenceMappings().size()); @@ -75,8 +80,14 @@ public class StructureSelectionManagerTest public void testRegisterMappings() { AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(new Sequence("s3", "ttt"), new Sequence("p3", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); List set1 = new ArrayList(); set1.add(acf1); diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java index c5e8ef5..96935ce 100644 --- a/test/jalview/util/DBRefUtilsTest.java +++ b/test/jalview/util/DBRefUtilsTest.java @@ -33,6 +33,8 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import java.util.List; + import org.testng.annotations.Test; public class DBRefUtilsTest @@ -191,12 +193,13 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3, ref4, ref5 }, target); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); } /** @@ -224,11 +227,12 @@ public class DBRefUtilsTest new int[] { 1, 1 }, 2, 2)); ref3.setMap(map3); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3 }, target); - assertEquals(2, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); + assertEquals(2, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); } /** @@ -249,11 +253,42 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, - ref2, ref3, ref4, ref5 }, "A1234"); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + DBRefEntry[] dbrefs = new DBRefEntry[] { ref1, + ref2, ref3, ref4, ref5 }; + List matches = DBRefUtils.searchRefs(dbrefs, "A1234"); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); + } + + /** + * Test the method that searches for matches references - case when we are + * matching a reference with null (any) accession id + */ + @Test(groups = { "Functional" }) + public void testSearchRefs_wildcardAccessionid() + { + DBRefEntry target = new DBRefEntry("EMBL", "2", null); + + DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches + // constructor changes embl to EMBL + DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches + // constructor does not upper-case accession id + DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches + DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match + // ref5 matches although it has a mapping - ignored + DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237"); + ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, + 1 }, 1, 1))); + + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, + ref2, ref3, ref4, ref5 }, target); + assertEquals(4, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref3, matches.get(2)); + assertSame(ref5, matches.get(3)); } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index d4ed0ea..ba298c5 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -563,6 +563,21 @@ public class MapListTest s); } + /** + * Test that confirms adding a map twice does nothing + */ + @Test(groups = { "Functional" }) + public void testAddMapList_sameMap() + { + MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 }, + new int[] { 72, 22 }, 1, 3); + String before = ml.toString(); + ml.addMapList(ml); + assertEquals(before, ml.toString()); + ml.addMapList(new MapList(ml)); + assertEquals(before, ml.toString()); + } + @Test(groups = { "Functional" }) public void testAddMapList_contiguous() { diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java index a54ce8b..94bf979 100644 --- a/test/jalview/ws/SequenceFetcherTest.java +++ b/test/jalview/ws/SequenceFetcherTest.java @@ -1,5 +1,6 @@ package jalview.ws; +import jalview.analysis.CrossRef; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; @@ -24,8 +25,6 @@ public class SequenceFetcherTest // TODO: extracted from SequenceFetcher - convert to proper unit test with // assertions - AlignmentI ds = null; - Vector noProds = new Vector(); String usage = "SequenceFetcher.main [-nodas] [ []]\n" + "With no arguments, all DbSources will be queried with their test Accession number.\n" + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n" @@ -44,7 +43,7 @@ public class SequenceFetcherTest { List sps = new SequenceFetcher(withDas) .getSourceProxy(argv[0]); - + if (sps != null) { for (DbSourceProxy sp : sps) @@ -52,7 +51,8 @@ public class SequenceFetcherTest AlignmentI al = null; try { - al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp + testRetrieval(argv[0], sp, + argv.length > 1 ? argv[1] : sp .getTestQuery()); } catch (Exception e) { @@ -61,16 +61,6 @@ public class SequenceFetcherTest + (argv.length > 1 ? argv[1] : sp.getTestQuery()) + " from " + argv[0] + "\nUsage: " + usage); } - SequenceI[] prod = al.getSequencesArray(); - if (al != null) - { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true) + " : " - + prod[p].getDescription()); - } - } } return; } @@ -95,139 +85,135 @@ public class SequenceFetcherTest } for (DbSourceProxy sp : sfetcher.getSourceProxy(db)) { - System.out.println("Source: " + sp.getDbName() + " (" + db - + "): retrieving test:" + sp.getTestQuery()); - AlignmentI al = null; - try + testRetrieval(db, sp, sp.getTestQuery()); + } + } + + } + + private static void testRetrieval(String db, DbSourceProxy sp, + String testQuery) + { + AlignmentI ds = null; + Vector noProds = new Vector(); + System.out.println("Source: " + sp.getDbName() + " (" + db + + "): retrieving test:" + sp.getTestQuery()); + { + AlignmentI al = null; + try + { + al = sp.getSequenceRecords(testQuery); + if (al != null && al.getHeight() > 0) { - al = sp.getSequenceRecords(sp.getTestQuery()); - if (al != null && al.getHeight() > 0) + boolean dna = sp.isDnaCoding(); + al.setDataset(null); + AlignmentI alds = al.getDataset(); + // try and find products + CrossRef crossRef = new CrossRef(al.getSequencesArray(), alds); + List types = crossRef.findXrefSourcesForSequences(dna); + if (types != null) { - boolean dna = sp.isDnaCoding(); - // try and find products - String types[] = jalview.analysis.CrossRef - .findSequenceXrefTypes(dna, al.getSequencesArray()); - if (types != null) + System.out.println("Xref Types for: " + (dna ? "dna" : "prot")); + for (String source : types) { - System.out.println("Xref Types for: " - + (dna ? "dna" : "prot")); - for (int t = 0; t < types.length; t++) + System.out.println("Type: " + source); + SequenceI[] prod = crossRef.findXrefSequences(source, dna) + .getSequencesArray(); + System.out.println("Found " + + ((prod == null) ? "no" : "" + prod.length) + + " products"); + if (prod != null) { - System.out.println("Type: " + types[t]); - SequenceI[] prod = jalview.analysis.CrossRef - .findXrefSequences(al.getSequencesArray(), dna, - types[t], null) - .getSequencesArray(); - System.out.println("Found " - + ((prod == null) ? "no" : "" + prod.length) - + " products"); - if (prod != null) + for (int p = 0; p < prod.length; p++) { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); } } } - else - { - noProds.addElement((dna ? new Object[] { al, al } - : new Object[] { al })); - } - - } - } catch (Exception ex) - { - System.out.println("ERROR:Failed to retrieve test query."); - ex.printStackTrace(System.out); - } - - if (al == null) - { - System.out.println("ERROR:No alignment retrieved."); - StringBuffer raw = sp.getRawRecords(); - if (raw != null) - { - System.out.println(raw.toString()); } else { - System.out.println("ERROR:No Raw results."); + noProds.addElement((dna ? new Object[] { al, al } + : new Object[] { al })); } + + } + } catch (Exception ex) + { + System.out.println("ERROR:Failed to retrieve test query."); + ex.printStackTrace(System.out); + } + + if (al == null) + { + System.out.println("ERROR:No alignment retrieved."); + StringBuffer raw = sp.getRawRecords(); + if (raw != null) + { + System.out.println(raw.toString()); } else { - System.out.println("Retrieved " + al.getHeight() + " sequences."); - for (int s = 0; s < al.getHeight(); s++) - { - SequenceI sq = al.getSequenceAt(s); - while (sq.getDatasetSequence() != null) - { - sq = sq.getDatasetSequence(); - - } - if (ds == null) - { - ds = new Alignment(new SequenceI[] { sq }); - - } - else - { - ds.addSequence(sq); - } - } + System.out.println("ERROR:No Raw results."); + } + } + else + { + System.out.println("Retrieved " + al.getHeight() + " sequences."); + if (ds == null) + { + ds = al.getDataset(); + } + else + { + ds.append(al.getDataset()); + al.setDataset(ds); } - System.out.flush(); - System.err.flush(); - } - if (noProds.size() > 0) + System.out.flush(); + System.err.flush(); + } + if (noProds.size() > 0) + { + Enumeration ts = noProds.elements(); + while (ts.hasMoreElements()) + { - Enumeration ts = noProds.elements(); - while (ts.hasMoreElements()) - + Object[] typeSq = ts.nextElement(); + boolean dna = (typeSq.length > 1); + AlignmentI al = (AlignmentI) typeSq[0]; + System.out.println("Trying getProducts for " + + al.getSequenceAt(0).getDisplayId(true)); + System.out.println("Search DS Xref for: " + (dna ? "dna" : "prot")); + // have a bash at finding the products amongst all the retrieved + // sequences. + SequenceI[] seqs = al.getSequencesArray(); + Alignment prodal = new CrossRef(seqs, ds).findXrefSequences(null, + dna); + System.out.println("Found " + + ((prodal == null) ? "no" : "" + prodal.getHeight()) + + " products"); + if (prodal != null) { - Object[] typeSq = ts.nextElement(); - boolean dna = (typeSq.length > 1); - AlignmentI al = (AlignmentI) typeSq[0]; - System.out.println("Trying getProducts for " - + al.getSequenceAt(0).getDisplayId(true)); - System.out.println("Search DS Xref for: " - + (dna ? "dna" : "prot")); - // have a bash at finding the products amongst all the retrieved - // sequences. - SequenceI[] seqs = al.getSequencesArray(); - Alignment prodal = jalview.analysis.CrossRef.findXrefSequences( - seqs, dna, null, ds); - System.out.println("Found " - + ((prodal == null) ? "no" : "" + prodal.getHeight()) - + " products"); - if (prodal != null) + SequenceI[] prod = prodal.getSequencesArray(); // note + // should + // test + // rather + // than + // throw + // away + // codon + // mapping + // (if + // present) + for (int p = 0; p < prod.length; p++) { - SequenceI[] prod = prodal.getSequencesArray(); // note - // should - // test - // rather - // than - // throw - // away - // codon - // mapping - // (if - // present) - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); } } - } - } } - } diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index 341d9ef..b3c7e10 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -178,8 +178,8 @@ public class DbRefFetcherTest .getMap().getMappedWidth(), 1); assertEquals("Expected local reference map to be 3 nucleotides", dr[0] .getMap().getWidth(), 3); - AlignmentI sprods = CrossRef.findXrefSequences( - alsq.getSequencesArray(), true, dr[0].getSource(), alsq); + AlignmentI sprods = new CrossRef(alsq.getSequencesArray(), alsq) + .findXrefSequences(dr[0].getSource(), true); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", sprods);