From 65675b2d016736f8ca4dfb2493863c4d53cec591 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 3 Jun 2016 09:00:04 +0100 Subject: [PATCH] JAL-2110 random stuff --- src/jalview/analysis/CrossRef.java | 45 +++++++++--------- src/jalview/analysis/CrossRefs.java | 19 ++++---- src/jalview/gui/AlignFrame.java | 3 +- src/jalview/gui/SequenceFetcher.java | 10 ++-- src/jalview/util/Comparison.java | 12 +++++ src/jalview/util/DBRefUtils.java | 56 +++++++++++++++++------ src/jalview/ws/seqfetcher/ASequenceFetcher.java | 25 +++++----- test/jalview/analysis/CrossRefTest.java | 2 +- test/jalview/analysis/CrossRefsTest.java | 4 -- test/jalview/util/DBRefUtilsTest.java | 45 ++++++++++-------- 10 files changed, 130 insertions(+), 91 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 8fd0706..cb664df 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -35,6 +35,7 @@ import jalview.ws.SequenceFetcherFactory; import jalview.ws.seqfetcher.ASequenceFetcher; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -297,7 +298,7 @@ public class CrossRef xrfs = t; try { - retrieved = sftch.getSequences(xrfs, !dna); + retrieved = sftch.getSequences(Arrays.asList(xrfs), !dna); // problem here is we don't know which of xrfs resulted in which // retrieved element } catch (Exception e) @@ -602,7 +603,7 @@ public class CrossRef // look for direct or indirect references in common DBRefEntry[] poss = nxt.getDBRefs(); - DBRefEntry[] cands = null; + List cands = null; /* * TODO does this make any sense? * if 'direct', search the dbrefs for xrf @@ -618,34 +619,36 @@ public class CrossRef poss = DBRefUtils.selectDbRefs(!dna, poss); cands = DBRefUtils.searchRefs(poss, xrf); } - if (cands != null) + if (!cands.isEmpty()) { if (!rseqs.contains(nxt)) { found = true; rseqs.add(nxt); - boolean foundmap = cf != null; - // don't search if we aren't given a codon map object - for (int r = 0; foundmap && r < cands.length; r++) + if (cf != null) { - if (cands[r].hasMap()) + // don't search if we aren't given a codon map object + for (DBRefEntry candidate : cands) { - Mapping mapping = cands[r].getMap(); - MapList map = mapping.getMap(); - if (mapping.getTo() != null - && map.getFromRatio() != map.getToRatio()) + Mapping mapping = candidate.getMap(); + if (mapping != null) { - // get sense of map correct for adding to product - // alignment. - if (dna) + MapList map = mapping.getMap(); + if (mapping.getTo() != null + && map.getFromRatio() != map.getToRatio()) { - // map is from dna seq to a protein product - cf.addMap(sequenceI, nxt, map); - } - else - { - // map should be from protein seq to its coding dna - cf.addMap(nxt, sequenceI, map.getInverse()); + // get sense of map correct for adding to product + // alignment. + if (dna) + { + // map is from dna seq to a protein product + cf.addMap(sequenceI, nxt, map); + } + else + { + // map should be from protein seq to its coding dna + cf.addMap(nxt, sequenceI, map.getInverse()); + } } } } diff --git a/src/jalview/analysis/CrossRefs.java b/src/jalview/analysis/CrossRefs.java index 7d0c263..0f3f425 100644 --- a/src/jalview/analysis/CrossRefs.java +++ b/src/jalview/analysis/CrossRefs.java @@ -49,6 +49,8 @@ public class CrossRefs List foundSeqs = new ArrayList(); AlignedCodonFrame mappings = new AlignedCodonFrame(); + List sourceRefs = new ArrayList(); + for (SequenceI seq : seqs) { if (dna != Comparison.isNucleotide(seq)) @@ -62,28 +64,27 @@ public class CrossRefs /* * get this sequence's dbrefs to source database (if any) */ - List sourceRefs = DBRefUtils.searchRefsForSource( + List seqSourceRefs = DBRefUtils.searchRefsForSource( seq.getDBRefs(), source); /* * first extract any mapped sequences from sourceRefs */ - findMappedDbrefs(seq, sourceRefs, foundSeqs, mappings); + findMappedDbrefs(seq, seqSourceRefs, foundSeqs, mappings); /* * for remaining sourceRefs, try to match a * complementary sequence in the dataset */ - findIndirectCrossReferences(seq, source, sourceRefs, dataset, + findIndirectCrossReferences(seq, source, seqSourceRefs, dataset, foundSeqs, mappings); - - /* - * fetch any remaining sourceRefs from the source database - */ - fetchCrossReferences(seq, sourceRefs, foundSeqs, mappings, dna, - dataset); } + /* + * fetch any remaining sourceRefs from the source database + */ + fetchCrossReferences(sourceRefs, foundSeqs, mappings, dna, dataset); + if (foundSeqs.isEmpty()) { return null; diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index ea3fdf2..477d113 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -23,6 +23,7 @@ package jalview.gui; import jalview.analysis.AlignmentSorter; import jalview.analysis.AlignmentUtils; import jalview.analysis.CrossRef; +import jalview.analysis.CrossRefs; import jalview.analysis.Dna; import jalview.analysis.ParseProperties; import jalview.analysis.SequenceIdMatcher; @@ -4705,7 +4706,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { AlignmentI alignment = AlignFrame.this.getViewport() .getAlignment(); - AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source, + AlignmentI xrefs = CrossRefs.findXrefSequences(sel, dna, source, alignment); if (xrefs != null) { diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 71c8a39..85ea20b 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -817,10 +817,8 @@ public class SequenceFetcher extends JPanel implements Runnable Cache.log.info( "Error retrieving " + accession + " from " + proxy.getDbName(), e); - } finally - { - return success; } + return success; } /** @@ -840,7 +838,6 @@ public class SequenceFetcher extends JPanel implements Runnable for (String q : queries) { - DBRefEntry[] found = null; DBRefEntry dbr = new DBRefEntry(); dbr.setSource(proxy.getDbSource()); dbr.setVersion(null); @@ -851,8 +848,9 @@ public class SequenceFetcher extends JPanel implements Runnable { if (rs[r] != null) { - found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId); - if (found != null && found.length > 0) + List found = DBRefUtils.searchRefs(rs[r].getDBRefs(), + accId); + if (!found.isEmpty()) { rfound = true; break; diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 5605a53..0beb45b 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -249,6 +249,18 @@ public class Comparison } /** + * Overloaded method signature to test whether a single sequence is nucleotide + * (that is, more than 85% CGTA) + * + * @param seq + * @return + */ + public static final boolean isNucleotide(SequenceI seq) + { + return isNucleotide(new SequenceI[] { seq }); + } + + /** * Answers true if more than 85% of the sequence residues (ignoring gaps) are * A, G, C, T or U, else false. This is just a heuristic guess and may give a * wrong answer (as AGCT are also amino acid codes). diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index db52647..ed6d860 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -151,8 +151,8 @@ public class DBRefUtils } /** - * Returns an array of those references that match the given entry, or null if - * no matches. Currently uses a comparator which matches if + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if *
    *
  • database sources are the same
  • *
  • accession ids are the same
  • @@ -165,34 +165,35 @@ public class DBRefUtils * pattern to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry) + public static List searchRefs(DBRefEntry[] ref, + DBRefEntry entry) { return searchRefs(ref, entry, matchDbAndIdAndEitherMapOrEquivalentMapList); } /** - * Returns an array of those references that match the given accession id + * Returns a list of those references that match the given accession id *
      *
    • database sources are the same
    • *
    • accession ids are the same
    • *
    • both have no mapping, or the mappings are the same
    • *
    * - * @param ref + * @param refs * Set of references to search - * @param entry - * pattern to match + * @param accId + * accession id to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId) + public static List searchRefs(DBRefEntry[] refs, String accId) { - return searchRefs(ref, new DBRefEntry("", "", accId), matchId); + return searchRefs(refs, new DBRefEntry("", "", accId), matchId); } /** - * Returns an array of those references that match the given entry, according - * to the given comparator. Returns null if no matches. + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. * * @param refs * an array of database references to search @@ -201,14 +202,14 @@ public class DBRefUtils * @param comparator * @return */ - static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry, + static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, DbRefComp comparator) { + List rfs = new ArrayList(); if (refs == null || entry == null) { - return null; + return rfs; } - List rfs = new ArrayList(); for (int i = 0; i < refs.length; i++) { if (comparator.matches(entry, refs[i])) @@ -216,7 +217,7 @@ public class DBRefUtils rfs.add(refs[i]); } } - return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]); + return rfs; } interface DbRefComp @@ -543,4 +544,29 @@ public class DBRefUtils // (not dna, not protein seq) } + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source databse + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource(DBRefEntry[] dbRefs, + String source) + { + List matches = new ArrayList(); + if (dbRefs != null && source != null) + { + for (DBRefEntry dbref : dbRefs) + { + if (source.equals(dbref.getSource())) + { + matches.add(dbref); + } + } + } + return matches; + } + } diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 0a49f66..33a917e 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -125,20 +125,20 @@ public class ASequenceFetcher * if true, only fetch from nucleotide data sources, else peptide * @return */ - public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) + public SequenceI[] getSequences(List refs, boolean dna) { Vector rseqs = new Vector(); Hashtable> queries = new Hashtable>(); - for (int r = 0; r < refs.length; r++) + for (DBRefEntry ref : refs) { - if (!queries.containsKey(refs[r].getSource())) + if (!queries.containsKey(ref.getSource())) { - queries.put(refs[r].getSource(), new ArrayList()); + queries.put(ref.getSource(), new ArrayList()); } - List qset = queries.get(refs[r].getSource()); - if (!qset.contains(refs[r].getAccessionId())) + List qset = queries.get(ref.getSource()); + if (!qset.contains(ref.getAccessionId())) { - qset.add(refs[r].getAccessionId()); + qset.add(ref.getAccessionId()); } } Enumeration e = queries.keys(); @@ -205,15 +205,12 @@ public class ASequenceFetcher for (int is = 0; is < seqs.length; is++) { rseqs.addElement(seqs[is]); - DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is] + List frefs = DBRefUtils.searchRefs(seqs[is] .getDBRefs(), new DBRefEntry(db, null, null)); - if (frefs != null) + for (DBRefEntry dbr : frefs) { - for (DBRefEntry dbr : frefs) - { - queriesFound.add(dbr.getAccessionId()); - queriesMade.remove(dbr.getAccessionId()); - } + queriesFound.add(dbr.getAccessionId()); + queriesMade.remove(dbr.getAccessionId()); } seqs[is] = null; } diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index 05b9e42..31f9728 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -401,7 +401,7 @@ public class CrossRefTest } @Override - public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) + public SequenceI[] getSequences(List refs, boolean dna) { return new SequenceI[] { pep1, pep2 }; } diff --git a/test/jalview/analysis/CrossRefsTest.java b/test/jalview/analysis/CrossRefsTest.java index f06cab0..cdcb184 100644 --- a/test/jalview/analysis/CrossRefsTest.java +++ b/test/jalview/analysis/CrossRefsTest.java @@ -99,10 +99,6 @@ public class CrossRefsTest emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); /* - * find EMBL xrefs for peptide sequence - it has no direct - * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned - */ - /* * Find EMBL xrefs for peptide * - it has no EMBL dbref of its own * - but nucleotide with matching peptide dbref does, so is returned diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java index 6c39b9b..96935ce 100644 --- a/test/jalview/util/DBRefUtilsTest.java +++ b/test/jalview/util/DBRefUtilsTest.java @@ -33,6 +33,8 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import java.util.List; + import org.testng.annotations.Test; public class DBRefUtilsTest @@ -191,12 +193,13 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3, ref4, ref5 }, target); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); } /** @@ -224,11 +227,12 @@ public class DBRefUtilsTest new int[] { 1, 1 }, 2, 2)); ref3.setMap(map3); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3 }, target); - assertEquals(2, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); + assertEquals(2, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); } /** @@ -251,11 +255,11 @@ public class DBRefUtilsTest DBRefEntry[] dbrefs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5 }; - DBRefEntry[] matches = DBRefUtils.searchRefs(dbrefs, "A1234"); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + List matches = DBRefUtils.searchRefs(dbrefs, "A1234"); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); } /** @@ -278,12 +282,13 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3, ref4, ref5 }, target); - assertEquals(4, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref3, matches[2]); - assertSame(ref5, matches[3]); + assertEquals(4, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref3, matches.get(2)); + assertSame(ref5, matches.get(3)); } } -- 1.7.10.2