* <p>Title: </p>\r
* SequenceIdMatcher\r
* <p>Description: </p>\r
- * Routine which does approximate Sequence Id resolution by name using string containment rather than equivalence\r
+ * Routine which does approximate Sequence Id resolution by name using\r
+ * string containment (on word boundaries) rather than equivalence\r
* <p>Copyright: Copyright (c) 2004</p>\r
*\r
* <p>Company: Dundee University</p>\r
public SequenceIdMatcher(SequenceI[] seqs)\r
{\r
names = new Hashtable();\r
-\r
for (int i = 0; i < seqs.length; i++)\r
{\r
names.put(new SeqIdName(seqs[i].getName()), seqs[i]);\r
*/\r
SequenceI[] findIdMatch(SequenceI[] seqs)\r
{\r
- SequenceI[] namedseqs = new SequenceI[seqs.length];\r
-\r
+ SequenceI[] namedseqs = null;\r
int i = 0;\r
SeqIdName nam;\r
\r
if (seqs.length > 0)\r
{\r
+ namedseqs = new SequenceI[seqs.length];\r
do\r
{\r
nam = new SeqIdName(seqs[i].getName());\r
namedseqs[i] = null;\r
}\r
}\r
- while (i++ < seqs.length);\r
+ while (++i < seqs.length);\r
}\r
\r
return namedseqs;\r
return false;\r
}\r
\r
+ /**\r
+ * matches if one ID properly contains another at a whitespace boundary.\r
+ * TODO: (JBPNote) These are not efficient. should use char[] for speed\r
+ * @param s SeqIdName\r
+ * @return boolean\r
+ */\r
public boolean equals(SeqIdName s)\r
{\r
- if (id.startsWith(s.id) || s.id.startsWith(id))\r
- {\r
- return true;\r
- }\r
-\r
- return false;\r
+ if (id.length()>s.id.length()) {\r
+ return id.startsWith(s.id) ?\r
+ (id.equals(s.id) ? true : id.startsWith(s.id+" "))\r
+ : false;\r
+ } else\r
+ return s.id.startsWith(id) ?\r
+ (s.id.equals(id) ? true : s.id.startsWith(id+" "))\r
+ : false;\r
}\r
\r
public boolean equals(String s)\r
{\r
- if (id.startsWith(s) || s.startsWith(id))\r
- {\r
- return true;\r
- }\r
-\r
- return false;\r
+ if (id.length()>s.length()) {\r
+ return id.startsWith(s) ?\r
+ (id.equals(s) ? true : id.startsWith(s+" "))\r
+ : false;\r
+ } else\r
+ return s.startsWith(id) ?\r
+ (s.equals(id) ? true : s.startsWith(id+" "))\r
+ : false;\r
}\r
}\r
}\r