From f5db0ff259cbb1132f6c061615d912b0668fd728 Mon Sep 17 00:00:00 2001 From: jprocter Date: Thu, 21 Jul 2011 17:06:47 +0100 Subject: [PATCH] retrieve all sequences matching a particular name (JAL-872) --- src/MCview/Atom.java | 3 ++ src/MCview/PDBChain.java | 8 +++- src/jalview/analysis/SequenceIdMatcher.java | 65 +++++++++++++++++++++++++-- src/jalview/gui/AlignFrame.java | 10 +++-- 4 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src/MCview/Atom.java b/src/MCview/Atom.java index 0a3bd39..25a3dc6 100755 --- a/src/MCview/Atom.java +++ b/src/MCview/Atom.java @@ -45,6 +45,9 @@ public class Atom public String chain; + /** + * this is a temporary value - designed to store the position in sequence that this atom corresponds to after aligning the chain to a SequenceI object. Do not rely on its value being correct when visualizing sequence colourings on the structure - use the StructureSelectionManager's mapping instead. + */ public int alignmentMapping = -1; public int atomIndex; diff --git a/src/MCview/PDBChain.java b/src/MCview/PDBChain.java index d5597de..65504e4 100755 --- a/src/MCview/PDBChain.java +++ b/src/MCview/PDBChain.java @@ -99,7 +99,7 @@ public class PDBChain /** * Annotate the residues with their corresponding positions in s1 using the * alignment in as - * + * NOTE: This clears all atom.alignmentMapping values on the structure. * @param as * @param s1 */ @@ -107,7 +107,11 @@ public class PDBChain { int pdbpos = as.getSeq2Start() - 2; int alignpos = s1.getStart() + as.getSeq1Start() - 3; - + // first clear out any old alignmentMapping values: + for (Atom atom: (Vector) atoms) { + atom.alignmentMapping=-1; + } + // and now trace the alignment onto the atom set. for (int i = 0; i < as.astr1.length(); i++) { if (as.astr1.charAt(i) != '-') diff --git a/src/jalview/analysis/SequenceIdMatcher.java b/src/jalview/analysis/SequenceIdMatcher.java index 71b2754..5bde225 100755 --- a/src/jalview/analysis/SequenceIdMatcher.java +++ b/src/jalview/analysis/SequenceIdMatcher.java @@ -53,7 +53,8 @@ public class SequenceIdMatcher names = new Hashtable(); for (int i = 0; i < seqs.length; i++) { - names.put(new SeqIdName(seqs[i].getName()), seqs[i]); + // TODO: deal with ID collisions - SequenceI should be appended to list associated with this key. + names.put(new SeqIdName(seqs[i].getDisplayId(true)), seqs[i]); // add in any interesting identifiers if (seqs[i].getDBRef() != null) { @@ -83,6 +84,22 @@ public class SequenceIdMatcher */ private SequenceI pickbestMatch(SeqIdName candName, Vector matches) { + SequenceI[] st= pickbestMatches(candName, matches); + return st==null || st.length==0 ? null : st[0]; + } + /** + * returns the closest SequenceI in matches to SeqIdName and returns all the + * matches to the names hash. + * + * @param candName + * SeqIdName + * @param matches + * Vector of SequenceI objects + * @return Object[] { SequenceI closest SequenceI to SeqIdName, SequenceI[] ties } + */ + private SequenceI[] pickbestMatches(SeqIdName candName, Vector matches) + { + ArrayList best=new ArrayList(); SequenceI match = null; if (candName == null || matches == null || matches.size() == 0) { @@ -90,6 +107,7 @@ public class SequenceIdMatcher } match = (SequenceI) matches.elementAt(0); matches.removeElementAt(0); + best.add(match); names.put(new SeqIdName(match.getName()), match); int matchlen = match.getName().length(); int namlen = candName.id.length(); @@ -97,17 +115,26 @@ public class SequenceIdMatcher { // look through for a better one. SequenceI cand = (SequenceI) matches.elementAt(0); + matches.remove(0); names.put(new SeqIdName(cand.getName()), cand); - int candlen = cand.getName().length(); + int q,w,candlen = cand.getName().length(); // keep the one with an id 'closer' to the given seqnam string - if (Math.abs(matchlen - namlen) > Math.abs(candlen - namlen) + if ((q=Math.abs(matchlen - namlen)) > (w=Math.abs(candlen - namlen)) && candlen > matchlen) { + best.clear(); match = cand; matchlen = candlen; + best.add(match); + } + if (q==w && candlen==matchlen) + { + // record any ties + best.add(cand); } } - return match; + if (best.size()==0) { return null; }; + return (SequenceI[]) best.toArray(new SequenceI[0]); } /** @@ -130,6 +157,17 @@ public class SequenceIdMatcher } /** + * Find all matches for a given sequence name. + * @param seqnam string to query Matcher with. + */ + public SequenceI[] findAllIdMatches(String seqnam) + { + + SeqIdName nam = new SeqIdName(seqnam); + return findAllIdMatches(nam); + } + + /** * findIdMatch * * Return pointers to sequences (or sequence object containers) which have @@ -183,6 +221,25 @@ public class SequenceIdMatcher } return pickbestMatch(nam, matches); } + /** + * core findIdMatch search method for finding all equivalent matches + * + * @param nam + * SeqIdName + * @return SequenceI[] + */ + private SequenceI[] findAllIdMatches( + jalview.analysis.SequenceIdMatcher.SeqIdName nam) + { + Vector matches = new Vector(); + while (names.containsKey(nam)) + { + matches.addElement(names.remove(nam)); + } + SequenceI[] r=pickbestMatches(nam, matches); + return r; + } + private class SeqIdName { diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index b658ffb..06272c3 100755 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4513,7 +4513,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, if (pdbfn.length() > 0) { // attempt to find a match in the alignment - SequenceI mtch = idm.findIdMatch(pdbfn); + SequenceI[] mtch = idm.findAllIdMatches(pdbfn); int l = 0, c = pdbfn.indexOf("."); while (mtch == null && c != -1) { @@ -4525,7 +4525,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { pdbfn = pdbfn.substring(0, l); } - mtch = idm.findIdMatch(pdbfn); + mtch = idm.findAllIdMatches(pdbfn); } if (mtch != null) { @@ -4570,17 +4570,19 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, // try and associate // TODO: may want to set a standard ID naming formalism for // associating PDB files which have no IDs. + for (SequenceI toassoc: (SequenceI[])fm[2]) { PDBEntry pe = new AssociatePdbFileWithSeq() .associatePdbWithSeq((String) fm[0], (String) fm[1], - (SequenceI) fm[2], false); + toassoc, false); if (pe != null) { System.err .println("Associated file : " + ((String) fm[0]) + " with " - + ((SequenceI) fm[2]).getDisplayId(true)); + + toassoc.getDisplayId(true)); assocfiles++; } + } alignPanel.paintAlignment(true); } } -- 1.7.10.2