2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
23 import jalview.datamodel.*;
28 * <p>Description: </p>
29 * Routine which does approximate Sequence Id resolution by name using
30 * string containment (on word boundaries) rather than equivalence
31 * <p>Copyright: Copyright (c) 2004</p>
33 * <p>Company: Dundee University</p>
35 * @author not attributable
38 public class SequenceIdMatcher
40 private Hashtable names;
42 public SequenceIdMatcher(SequenceI[] seqs)
44 names = new Hashtable();
45 for (int i = 0; i < seqs.length; i++)
47 names.put(new SeqIdName(seqs[i].getName()), seqs[i]);
51 SequenceI findIdMatch(SequenceI seq)
53 SeqIdName nam = new SeqIdName(seq.getName());
55 if (names.containsKey(nam))
57 return (SequenceI) names.get(nam);
63 SequenceI findIdMatch(String seqnam)
65 SeqIdName nam = new SeqIdName(seqnam);
67 if (names.containsKey(nam))
69 return (SequenceI) names.get(nam);
78 * Return pointers to sequences (or sequence object containers)
79 * which have same Id as a given set of different sequence objects
81 * @param seqs SequenceI[]
84 SequenceI[] findIdMatch(SequenceI[] seqs)
86 SequenceI[] namedseqs = null;
92 namedseqs = new SequenceI[seqs.length];
95 nam = new SeqIdName(seqs[i].getName());
97 if (names.containsKey(nam))
99 namedseqs[i] = (SequenceI) names.get(nam);
106 while (++i < seqs.length);
112 private class SeqIdName
124 public int hashCode()
126 return ((id.length()>=4) ? id.substring(0, 4).hashCode() : id.hashCode());
129 public boolean equals(Object s)
131 if (s instanceof SeqIdName)
133 return this.equals( (SeqIdName) s);
137 if (s instanceof String)
139 return this.equals( (String) s);
147 * Characters that define the end of a unique sequence ID at
148 * the beginning of an arbitrary ID string
149 * JBPNote: This is a heuristic that will fail for arbritrarily extended sequence id's
150 * (like portions of an aligned set of repeats from one sequence)
152 private String WORD_SEP="~. |#\\/<>!\"£$%^*)}[@',?";
155 * matches if one ID properly contains another at a whitespace boundary.
156 * TODO: (JBPNote) These are not efficient. should use char[] for speed
157 * todo: (JBPNote) Set separator characters appropriately
161 public boolean equals(SeqIdName s)
163 if (id.length()>s.id.length()) {
164 return id.startsWith(s.id) ?
165 (WORD_SEP.indexOf(id.charAt(s.id.length()))>-1)
168 return s.id.startsWith(id) ?
169 (s.id.equals(id) ? true :
170 (WORD_SEP.indexOf(s.id.charAt(id.length()))>-1))
174 public boolean equals(String s)
176 if (id.length()>s.length()) {
177 return id.startsWith(s) ?
178 (WORD_SEP.indexOf(id.charAt(s.length()))>-1)
181 return s.startsWith(id) ?
182 (s.equals(id) ? true :
183 (WORD_SEP.indexOf(s.charAt(id.length()))>-1))