-package jalview.analysis;
-
-import java.util.Vector;
-import java.util.Hashtable;
-import jalview.datamodel.SequenceI;
-
-/**
- * <p>Title: </p>
- * SequenceIdMatcher
- * <p>Description: </p>
- * Routine which does approximate Sequence Id resolution by name using string containment rather than equivalence
- * <p>Copyright: Copyright (c) 2004</p>
- *
- * <p>Company: Dundee University</p>
- *
- * @author not attributable
- * @version 1.0
- */
-public class SequenceIdMatcher
-{
-
- private class SeqIdName
- {
- String id;
-
- SeqIdName(String s)
- {
- id = new String(s);
- }
-
- public int hashCode()
- {
- return (id.substring(0, 4).hashCode());
- }
-
- public boolean equals(Object s)
- {
- if (s instanceof SeqIdName)
- {
- return this.equals( (SeqIdName) s);
- }
- else
- {
- if (s instanceof String)
- {
- return this.equals( (String) s);
- }
- }
- return false;
- }
-
- public boolean equals(SeqIdName s)
- {
- if (id.startsWith(s.id) || s.id.startsWith(id))
- {
- return true;
- }
- return false;
- }
-
- public boolean equals(String s)
- {
- if (id.startsWith(s) || s.startsWith(id))
- {
- return true;
- }
- return false;
- }
- }
-
- private Hashtable names;
-
- public SequenceIdMatcher(SequenceI[] seqs)
- {
- names = new Hashtable();
- for (int i = 0; i < seqs.length; i++)
- {
- names.put(new SeqIdName(seqs[i].getName()), seqs[i]);
- }
- }
-
- SequenceI findIdMatch(SequenceI seq)
- {
- SeqIdName nam = new SeqIdName(seq.getName());
- if (names.containsKey(nam))
- {
- return (SequenceI) names.get(nam);
- }
- return null;
- }
-
- SequenceI findIdMatch(String seqnam)
- {
- SeqIdName nam = new SeqIdName(seqnam);
- if (names.containsKey(nam))
- {
- return (SequenceI) names.get(nam);
- }
- return null;
- }
-
- /**
- * @method findIdMatch
- *
- * Return pointers to sequences (or sequence object containers)
- * which have same Id as a given set of different sequence objects
- *
- * @param seqs SequenceI[]
- * @return SequenceI[]
- */
-
- SequenceI[] findIdMatch(SequenceI[] seqs)
- {
- SequenceI[] namedseqs = new SequenceI[seqs.length];
-
- int i = 0;
- SeqIdName nam;
- if (seqs.length > 0)
- {
- do
- {
- nam = new SeqIdName(seqs[i].getName());
- if (names.containsKey(nam))
- {
- namedseqs[i] = (SequenceI) names.get(nam);
- }
- else
- {
- namedseqs[i] = null;
- }
- }
- while (i++ < seqs.length);
- }
- return namedseqs;
- }
-
-}
+/*\r
+ * Jalview - A Sequence Alignment Editor and Viewer\r
+ * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
+ *\r
+ * This program is free software; you can redistribute it and/or\r
+ * modify it under the terms of the GNU General Public License\r
+ * as published by the Free Software Foundation; either version 2\r
+ * of the License, or (at your option) any later version.\r
+ *\r
+ * This program is distributed in the hope that it will be useful,\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * GNU General Public License for more details.\r
+ *\r
+ * You should have received a copy of the GNU General Public License\r
+ * along with this program; if not, write to the Free Software\r
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+ */\r
+package jalview.analysis;\r
+\r
+import java.util.*;\r
+\r
+import jalview.datamodel.*;\r
+\r
+/**\r
+ * <p>Title: </p>\r
+ * SequenceIdMatcher\r
+ * <p>Description: </p>\r
+ * Routine which does approximate Sequence Id resolution by name using\r
+ * string containment (on word boundaries) rather than equivalence\r
+ * <p>Copyright: Copyright (c) 2004</p>\r
+ *\r
+ * <p>Company: Dundee University</p>\r
+ *\r
+ * @author not attributable\r
+ * @version 1.0\r
+ */\r
+public class SequenceIdMatcher\r
+{\r
+ private Hashtable names;\r
+\r
+ public SequenceIdMatcher(SequenceI[] seqs)\r
+ {\r
+ names = new Hashtable();\r
+ for (int i = 0; i < seqs.length; i++)\r
+ {\r
+ names.put(new SeqIdName(seqs[i].getName()), seqs[i]);\r
+ }\r
+ }\r
+\r
+ SequenceI findIdMatch(SequenceI seq)\r
+ {\r
+ SeqIdName nam = new SeqIdName(seq.getName());\r
+\r
+ if (names.containsKey(nam))\r
+ {\r
+ return (SequenceI) names.get(nam);\r
+ }\r
+\r
+ return null;\r
+ }\r
+\r
+ SequenceI findIdMatch(String seqnam)\r
+ {\r
+ SeqIdName nam = new SeqIdName(seqnam);\r
+\r
+ if (names.containsKey(nam))\r
+ {\r
+ return (SequenceI) names.get(nam);\r
+ }\r
+\r
+ return null;\r
+ }\r
+\r
+ /**\r
+ * findIdMatch\r
+ *\r
+ * Return pointers to sequences (or sequence object containers)\r
+ * which have same Id as a given set of different sequence objects\r
+ *\r
+ * @param seqs SequenceI[]\r
+ * @return SequenceI[]\r
+ */\r
+ SequenceI[] findIdMatch(SequenceI[] seqs)\r
+ {\r
+ SequenceI[] namedseqs = null;\r
+ int i = 0;\r
+ SeqIdName nam;\r
+\r
+ if (seqs.length > 0)\r
+ {\r
+ namedseqs = new SequenceI[seqs.length];\r
+ do\r
+ {\r
+ nam = new SeqIdName(seqs[i].getName());\r
+\r
+ if (names.containsKey(nam))\r
+ {\r
+ namedseqs[i] = (SequenceI) names.get(nam);\r
+ }\r
+ else\r
+ {\r
+ namedseqs[i] = null;\r
+ }\r
+ }\r
+ while (++i < seqs.length);\r
+ }\r
+\r
+ return namedseqs;\r
+ }\r
+\r
+ private class SeqIdName\r
+ {\r
+ String id;\r
+\r
+ SeqIdName(String s)\r
+ {\r
+ if (s!=null)\r
+ id = new String(s);\r
+ else\r
+ id = "";\r
+ }\r
+\r
+ public int hashCode()\r
+ {\r
+ return ((id.length()>=4) ? id.substring(0, 4).hashCode() : id.hashCode());\r
+ }\r
+\r
+ public boolean equals(Object s)\r
+ {\r
+ if (s instanceof SeqIdName)\r
+ {\r
+ return this.equals( (SeqIdName) s);\r
+ }\r
+ else\r
+ {\r
+ if (s instanceof String)\r
+ {\r
+ return this.equals( (String) s);\r
+ }\r
+ }\r
+\r
+ return false;\r
+ }\r
+\r
+ /**\r
+ * Characters that define the end of a unique sequence ID at\r
+ * the beginning of an arbitrary ID string\r
+ * JBPNote: This is a heuristic that will fail for arbritrarily extended sequence id's\r
+ * (like portions of an aligned set of repeats from one sequence)\r
+ */\r
+ private String WORD_SEP="~. |#\\/<>!\"£$%^*)}[@',?";\r
+\r
+ /**\r
+ * matches if one ID properly contains another at a whitespace boundary.\r
+ * TODO: (JBPNote) These are not efficient. should use char[] for speed\r
+ * todo: (JBPNote) Set separator characters appropriately\r
+ * @param s SeqIdName\r
+ * @return boolean\r
+ */\r
+ public boolean equals(SeqIdName s)\r
+ {\r
+ if (id.length()>s.id.length()) {\r
+ return id.startsWith(s.id) ?\r
+ (WORD_SEP.indexOf(id.charAt(s.id.length()))>-1)\r
+ : false;\r
+ } else\r
+ return s.id.startsWith(id) ?\r
+ (s.id.equals(id) ? true :\r
+ (WORD_SEP.indexOf(s.id.charAt(id.length()))>-1))\r
+ : false;\r
+ }\r
+\r
+ public boolean equals(String s)\r
+ {\r
+ if (id.length()>s.length()) {\r
+ return id.startsWith(s) ?\r
+ (WORD_SEP.indexOf(id.charAt(s.length()))>-1)\r
+ : false;\r
+ } else\r
+ return s.startsWith(id) ?\r
+ (s.equals(id) ? true :\r
+ (WORD_SEP.indexOf(s.charAt(id.length()))>-1))\r
+ : false;\r
+ }\r
+ }\r
+}\r