X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FSequenceIdMatcher.java;h=ed90422403ec00eb40940f1cc545ae7f4c0f651f;hb=174230b4233d9ce80f94527768d2cd2f76da11ab;hp=b88a03f0636d71f8456a905a1c3379e9041b049b;hpb=df3d63b97cc55ee0e7d078d050775f7f58ab3c42;p=jalview.git diff --git a/src/jalview/analysis/SequenceIdMatcher.java b/src/jalview/analysis/SequenceIdMatcher.java index b88a03f..ed90422 100755 --- a/src/jalview/analysis/SequenceIdMatcher.java +++ b/src/jalview/analysis/SequenceIdMatcher.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -115,12 +115,15 @@ public class SequenceIdMatcher SeqIdName(String s) { - id = new String(s); + if (s!=null) + id = new String(s); + else + id = ""; } public int hashCode() { - return (id.substring(0, 4).hashCode()); + return ((id.length()>=4) ? id.substring(0, 4).hashCode() : id.hashCode()); } public boolean equals(Object s) @@ -141,20 +144,30 @@ public class SequenceIdMatcher } /** - * matches if one ID properly contains another at a whitespace boundary. - * TODO: (JBPNote) These are not efficient. should use char[] for speed - * @param s SeqIdName - * @return boolean + * Characters that define the end of a unique sequence ID at + * the beginning of an arbitrary ID string + * JBPNote: This is a heuristic that will fail for arbritrarily extended sequence id's + * (like portions of an aligned set of repeats from one sequence) */ + private String WORD_SEP="~. |#\\/<>!\"£$%^*)}[@',?"; + + /** + * matches if one ID properly contains another at a whitespace boundary. + * TODO: (JBPNote) These are not efficient. should use char[] for speed + * todo: (JBPNote) Set separator characters appropriately + * @param s SeqIdName + * @return boolean + */ public boolean equals(SeqIdName s) { if (id.length()>s.id.length()) { return id.startsWith(s.id) ? - (id.equals(s.id) ? true : id.startsWith(s.id+" ")) + (WORD_SEP.indexOf(id.charAt(s.id.length()))>-1) : false; } else return s.id.startsWith(id) ? - (s.id.equals(id) ? true : s.id.startsWith(id+" ")) + (s.id.equals(id) ? true : + (WORD_SEP.indexOf(s.id.charAt(id.length()))>-1)) : false; } @@ -162,11 +175,12 @@ public class SequenceIdMatcher { if (id.length()>s.length()) { return id.startsWith(s) ? - (id.equals(s) ? true : id.startsWith(s+" ")) + (WORD_SEP.indexOf(id.charAt(s.length()))>-1) : false; } else return s.startsWith(id) ? - (s.equals(id) ? true : s.startsWith(id+" ")) + (s.equals(id) ? true : + (WORD_SEP.indexOf(s.charAt(id.length()))>-1)) : false; } }