+/*\r
+ * Jalview - A Sequence Alignment Editor and Viewer\r
+ * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
+ *\r
+ * This program is free software; you can redistribute it and/or\r
+ * modify it under the terms of the GNU General Public License\r
+ * as published by the Free Software Foundation; either version 2\r
+ * of the License, or (at your option) any later version.\r
+ *\r
+ * This program is distributed in the hope that it will be useful,\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * GNU General Public License for more details.\r
+ *\r
+ * You should have received a copy of the GNU General Public License\r
+ * along with this program; if not, write to the Free Software\r
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+ */\r
package jalview.util;\r
\r
import jalview.datamodel.*;\r
\r
-public class Comparison {\r
-\r
- public static float compare(SequenceI ii, SequenceI jj)\r
+/**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @author $author$\r
+ * @version $Revision$\r
+ */\r
+public class Comparison\r
+{\r
+ /** DOCUMENT ME!! */\r
+ public static final String GapChars = " .-";\r
+\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param ii DOCUMENT ME!\r
+ * @param jj DOCUMENT ME!\r
+ *\r
+ * @return DOCUMENT ME!\r
+ */\r
+ public static final float compare(SequenceI ii, SequenceI jj)\r
{\r
- return Comparison.compare(ii,jj,0,ii.getLength()-1);\r
+ return Comparison.compare(ii, jj, 0, ii.getLength() - 1);\r
}\r
- public static float compare(SequenceI ii, SequenceI jj, int start, int end) {\r
-\r
- String si = ii.getSequence();\r
- String sj = jj.getSequence();\r
-\r
- int ilen = si.length()-1;\r
- int jlen = sj.length()-1;\r
-\r
- while (jalview.util.Comparison.isGap(si.charAt(start + ilen)))\r
- {\r
- ilen--;\r
- }\r
-\r
- while (jalview.util.Comparison.isGap(sj.charAt(start + jlen)))\r
- {\r
- jlen--;\r
- }\r
-\r
- int count = 0;\r
- int match = 0;\r
- float pid = -1;\r
-\r
- if (ilen > jlen) {\r
-\r
- for (int j = 0; j < jlen; j++) {\r
- if (si.substring(start + j,start + j+1).equals(sj.substring(start + j,start + j+1))) {\r
- match++;\r
- }\r
- count++;\r
- }\r
- pid = (float)match/(float)ilen * 100;\r
- } else {\r
- for (int j = 0; j < jlen; j++) {\r
- if (si.substring(start + j,start + j+1).equals(sj.substring(start + j,start + j+1))) {\r
- match++;\r
- }\r
- count++;\r
- }\r
- pid = (float)match/(float)jlen * 100;\r
- }\r
+\r
+ /**\r
+ * this was supposed to be an ungapped pid calculation\r
+ * @param ii SequenceI\r
+ * @param jj SequenceI\r
+ * @param start int\r
+ * @param end int\r
+ * @return float\r
+ */\r
+ public static float compare(SequenceI ii, SequenceI jj, int start, int end)\r
+ {\r
+ String si = ii.getSequenceAsString();\r
+ String sj = jj.getSequenceAsString();\r
+\r
+ int ilen = si.length() - 1;\r
+ int jlen = sj.length() - 1;\r
+\r
+ while (jalview.util.Comparison.isGap(si.charAt(start + ilen)))\r
+ {\r
+ ilen--;\r
+ }\r
+\r
+ while (jalview.util.Comparison.isGap(sj.charAt(start + jlen)))\r
+ {\r
+ jlen--;\r
+ }\r
+\r
+ int count = 0;\r
+ int match = 0;\r
+ float pid = -1;\r
+\r
+ if (ilen > jlen)\r
+ {\r
+ for (int j = 0; j < jlen; j++)\r
+ {\r
+ if (si.substring(start + j, start + j + 1).equals(sj.substring(start +\r
+ j, start + j + 1)))\r
+ {\r
+ match++;\r
+ }\r
+\r
+ count++;\r
+ }\r
+\r
+ pid = (float) match / (float) ilen * 100;\r
+ }\r
+ else\r
+ {\r
+ for (int j = 0; j < jlen; j++)\r
+ {\r
+ if (si.substring(start + j, start + j + 1).equals(sj.substring(start +\r
+ j, start + j + 1)))\r
+ {\r
+ match++;\r
+ }\r
+\r
+ count++;\r
+ }\r
+\r
+ pid = (float) match / (float) jlen * 100;\r
+ }\r
\r
return pid;\r
}\r
\r
- /** */\r
- public static float PID(Sequence s1 , Sequence s2)\r
+ /**\r
+ * this is a gapped PID calculation\r
+ *\r
+ * @param s1 SequenceI\r
+ * @param s2 SequenceI\r
+ * @return float\r
+ */\r
+ public final static float PID(String seq1, String seq2)\r
{\r
- int len;\r
+ return PID(seq1, seq2, 0, seq1.length());\r
+ }\r
\r
- if (s1.getSequence().length() > s2.getSequence().length())\r
- len = s1.getSequence().length();\r
- else\r
- len = s2.getSequence().length();\r
+ static final int caseShift = 'a' - 'A';\r
\r
+ // Another pid with region specification\r
+ public final static float PID(String seq1, String seq2, int start, int end)\r
+ {\r
\r
- int bad = 0;\r
+ int s1len = seq1.length();\r
+ int s2len = seq2.length();\r
\r
- for (int i = 0; i < len; i++)\r
+ int len = Math.min(s1len, s2len);\r
+\r
+ if (end < len)\r
{\r
- char chr1;\r
- char chr2;\r
+ len = end;\r
+ }\r
+\r
+ if (len < start)\r
+ {\r
+ start = len - 1; // we just use a single residue for the difference\r
+ }\r
\r
- if (i < s1.getSequence().length())\r
- chr1 = s1.getSequence().charAt(i);\r
- else\r
- chr1 = '.';\r
+ int bad = 0;\r
+ char chr1;\r
+ char chr2;\r
\r
+ for (int i = start; i < len; i++)\r
+ {\r
+ chr1 = seq1.charAt(i);\r
\r
- if (i < s2.getSequence().length())\r
- chr2 = s2.getSequence().charAt(i);\r
- else\r
- chr2 = '.';\r
+ chr2 = seq2.charAt(i);\r
\r
+ if ('a' <= chr1 && chr1 <= 'z')\r
+ {\r
+ // TO UPPERCASE !!!\r
+ //Faster than toUpperCase\r
+ chr1 -= caseShift;\r
+ }\r
+ if ('a' <= chr2 && chr2 <= 'z')\r
+ {\r
+ // TO UPPERCASE !!!\r
+ //Faster than toUpperCase\r
+ chr2 -= caseShift;\r
+ }\r
\r
- if (!(jalview.util.Comparison.isGap( chr1 )) && !(jalview.util.Comparison.isGap( chr2 )))\r
+ if (chr1 != chr2 && !isGap(chr1) && !isGap(chr2))\r
{\r
- if (chr1!=chr2)\r
- bad++;\r
+ bad++;\r
}\r
}\r
\r
- return (float)100*(len-bad)/len;\r
+ return ( (float) 100 * (len - bad)) / len;\r
}\r
\r
- public static boolean isGap(char c)\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param c DOCUMENT ME!\r
+ *\r
+ * @return DOCUMENT ME!\r
+ */\r
+ public static final boolean isGap(char c)\r
{\r
- return (c != '.' && c != '-' && c != ' ') ? false : true;\r
+ return (c == '-' || c == '.' || c == ' ') ? true : false;\r
+ }\r
+\r
+ public static final boolean isNucleotide(SequenceI[] seqs)\r
+ {\r
+ int i = 0, iSize = seqs.length, j, jSize;\r
+ float nt = 0, aa = 0;\r
+ char c;\r
+ while (i < iSize)\r
+ {\r
+ jSize = seqs[i].getLength();\r
+ for (j = 0; j < jSize; j++)\r
+ {\r
+ c = seqs[i].getCharAt(j);\r
+ if ('a' <= c && c <= 'z')\r
+ {\r
+ c -= ('a' - 'A');\r
+ }\r
+\r
+ if (c == 'A' || c == 'G' || c == 'C' || c == 'T' || c == 'U')\r
+ {\r
+ nt++;\r
+ }\r
+ else if (!jalview.util.Comparison.isGap(seqs[i].getCharAt(j)))\r
+ {\r
+ aa++;\r
+ }\r
+ }\r
+ i++;\r
+ }\r
+\r
+ if ( (nt / (nt + aa)) > 0.85f)\r
+ {\r
+ return true;\r
+ }\r
+ else\r
+ {\r
+ return false;\r
+ }\r
+\r
}\r
}\r