From 5a6113ce9fef6f59b895a40a69b55b120735c533 Mon Sep 17 00:00:00 2001 From: jprocter Date: Thu, 5 May 2005 13:15:25 +0000 Subject: [PATCH] Moved tree leafname to alignment set matching into SequenceIdMatcher class for use in other contexts. --- src/jalview/analysis/NJTree.java | 62 +++--------- src/jalview/analysis/SequenceIdMatcher.java | 137 +++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 49 deletions(-) create mode 100755 src/jalview/analysis/SequenceIdMatcher.java diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 8f3483d..7f4850e 100755 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -48,69 +48,33 @@ public class NJTree { maxheight = findHeight(top); } - // Private SequenceID class to do fuzzy .equals() method for Hashtable. - - private class SeqIdname { - String id; - - SeqIdname(String s) { - id = new String(s); - } - public int hashCode() { - return (id.substring(0,4).hashCode()); - } - public boolean equals(Object s) { - if (s instanceof SeqIdname) { - return this.equals((SeqIdname) s); - } else { - if (s instanceof String) { - return this.equals((String) s); - } - } - return false; - } - - - public boolean equals(SeqIdname s) { - if (id.startsWith(s.id) || s.id.startsWith(id)) { - return true; - } - return false; - } - - public boolean equals(String s) { - if (id.startsWith(s) || s.startsWith(id)) { - return true; - } - return false; - } - } public NJTree(SequenceI[] seqs, NewickFile treefile) { top = treefile.getTree(); maxheight = findHeight(top); - Hashtable names = new Hashtable(); - for (int i = 0; i < seqs.length; i++) - { - names.put(new SeqIdname(seqs[i].getDisplayId()), seqs[i]); - } + SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); + Vector leaves = new Vector(); findLeaves(top, leaves); + int i = 0; int namesleft = seqs.length; + SequenceNode j; - SeqIdname nam; + SequenceI nam; + String realnam; while (i < leaves.size()) { j = (SequenceNode) leaves.elementAt(i++); - nam = new SeqIdname(j.getName()); - if ((namesleft>-1) - && names.containsKey(nam)) - { - j.setElement(names.get(nam)); + realnam = j.getName(); + nam = null; + if (namesleft>-1) + nam = algnIds.findIdMatch(realnam); + if (nam != null) { + j.setElement(nam); namesleft--; } else { - j.setElement(new Sequence(nam.id, "THISISAPLACEHLDER")); + j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); } } } diff --git a/src/jalview/analysis/SequenceIdMatcher.java b/src/jalview/analysis/SequenceIdMatcher.java new file mode 100755 index 0000000..03528db --- /dev/null +++ b/src/jalview/analysis/SequenceIdMatcher.java @@ -0,0 +1,137 @@ +package jalview.analysis; + +import java.util.Vector; +import java.util.Hashtable; +import jalview.datamodel.SequenceI; + +/** + *

Title:

+ * SequenceIdMatcher + *

Description:

+ * Routine which does approximate Sequence Id resolution by name using string containment rather than equivalence + *

Copyright: Copyright (c) 2004

+ * + *

Company: Dundee University

+ * + * @author not attributable + * @version 1.0 + */ +public class SequenceIdMatcher +{ + + private class SeqIdName + { + String id; + + SeqIdName(String s) + { + id = new String(s); + } + + public int hashCode() + { + return (id.substring(0, 4).hashCode()); + } + + public boolean equals(Object s) + { + if (s instanceof SeqIdName) + { + return this.equals( (SeqIdName) s); + } + else + { + if (s instanceof String) + { + return this.equals( (String) s); + } + } + return false; + } + + public boolean equals(SeqIdName s) + { + if (id.startsWith(s.id) || s.id.startsWith(id)) + { + return true; + } + return false; + } + + public boolean equals(String s) + { + if (id.startsWith(s) || s.startsWith(id)) + { + return true; + } + return false; + } + } + + private Hashtable names; + + public SequenceIdMatcher(SequenceI[] seqs) + { + names = new Hashtable(); + for (int i = 0; i < seqs.length; i++) + { + names.put(new SeqIdName(seqs[i].getName()), seqs[i]); + } + } + + SequenceI findIdMatch(SequenceI seq) + { + SeqIdName nam = new SeqIdName(seq.getName()); + if (names.containsKey(nam)) + { + return (SequenceI) names.get(nam); + } + return null; + } + + SequenceI findIdMatch(String seqnam) + { + SeqIdName nam = new SeqIdName(seqnam); + if (names.containsKey(nam)) + { + return (SequenceI) names.get(nam); + } + return null; + } + + /** + * @method findIdMatch + * + * Return pointers to sequences (or sequence object containers) + * which have same Id as a given set of different sequence objects + * + * @param seqs SequenceI[] + * @return SequenceI[] + */ + + SequenceI[] findIdMatch(SequenceI[] seqs) + { + SequenceI[] namedseqs = new SequenceI[seqs.length]; + + int i = 0; + SeqIdName nam; + if (seqs.length > 0) + { + do + { + nam = new SeqIdName(seqs[i].getName()); + if (names.containsKey(nam)) + { + namedseqs[i] = (SequenceI) names.get(nam); + } + else + { + namedseqs[i] = null; + } + } + while (i++ < seqs.length); + } + return namedseqs; + } + +} -- 1.7.10.2