2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import java.util.Locale;
25 import jalview.analysis.scoremodels.PIDModel;
26 import jalview.analysis.scoremodels.ScoreMatrix;
27 import jalview.analysis.scoremodels.ScoreModels;
28 import jalview.analysis.scoremodels.SimilarityParams;
29 import jalview.datamodel.AlignmentAnnotation;
30 import jalview.datamodel.AlignmentI;
31 import jalview.datamodel.Mapping;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceI;
34 import jalview.math.MiscMath;
35 import jalview.util.Comparison;
36 import jalview.util.Format;
37 import jalview.util.MapList;
38 import jalview.util.MessageManager;
40 import java.awt.Color;
41 import java.awt.Graphics;
42 import java.io.PrintStream;
43 import java.lang.IllegalArgumentException;
44 import java.util.ArrayList;
45 import java.util.Arrays;
46 import java.util.HashMap;
47 import java.util.List;
48 import java.util.StringTokenizer;
58 private static final int MAX_NAME_LENGTH = 30;
61 //private static final int GAP_OPEN_COST = 120;
62 private static final int GAP_OPEN_COST = 10;
64 //private static final int GAP_EXTEND_COST = 20;
65 private static final float GAP_EXTEND_COST = 0.5f;
67 private static final int GAP_INDEX = -1;
69 public static final String PEP = "pep";
71 public static final String DNA = "dna";
73 private static final String NEWLINE = System.lineSeparator();
83 int[][] traceback; // todo is this actually used?
105 public String astr1 = "";
107 public String astr2 = "";
109 public String indelfreeAstr1 = "";
111 public String indelfreeAstr2 = "";
114 public int seq1start;
120 public int seq2start;
126 public float maxscore;
128 public float meanScore; //needed for PaSiMap
130 public float hypotheticMaxScore; // needed for PaSiMap
134 StringBuffer output = new StringBuffer();
136 String type; // AlignSeq.PEP or AlignSeq.DNA
138 private ScoreMatrix scoreMatrix;
141 * Creates a new AlignSeq object.
144 * first sequence for alignment
146 * second sequence for alignment
148 * molecule type, either AlignSeq.PEP or AlignSeq.DNA
150 public AlignSeq(SequenceI s1, SequenceI s2, String type)
152 seqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(),
157 * Creates a new AlignSeq object.
166 public AlignSeq(SequenceI s1, String string1, SequenceI s2,
167 String string2, String type)
169 seqInit(s1, string1.toUpperCase(Locale.ROOT), s2,
170 string2.toUpperCase(Locale.ROOT), type);
176 * @return DOCUMENT ME!
178 public float getMaxScore()
184 * returns the overall score of the alignment
188 public float getAlignmentScore()
190 return alignmentScore;
196 * @return DOCUMENT ME!
198 public int getSeq2Start()
206 * @return DOCUMENT ME!
208 public int getSeq2End()
216 * @return DOCUMENT ME!
218 public int getSeq1Start()
226 * @return DOCUMENT ME!
228 public int getSeq1End()
236 * @return DOCUMENT ME!
238 public String getOutput()
240 return output.toString();
246 * @return DOCUMENT ME!
248 public String getAStr1()
256 * @return DOCUMENT ME!
258 public String getAStr2()
266 * @return DOCUMENT ME!
268 public int[] getASeq1()
276 * @return DOCUMENT ME!
278 public int[] getASeq2()
285 * @return aligned instance of Seq 1
287 public SequenceI getAlignedSeq1()
289 SequenceI alSeq1 = new Sequence(s1.getName(), getAStr1());
290 alSeq1.setStart(s1.getStart() + getSeq1Start() - 1);
291 alSeq1.setEnd(s1.getStart() + getSeq1End() - 1);
292 alSeq1.setDatasetSequence(
293 s1.getDatasetSequence() == null ? s1 : s1.getDatasetSequence());
299 * @return aligned instance of Seq 2
301 public SequenceI getAlignedSeq2()
303 SequenceI alSeq2 = new Sequence(s2.getName(), getAStr2());
304 alSeq2.setStart(s2.getStart() + getSeq2Start() - 1);
305 alSeq2.setEnd(s2.getStart() + getSeq2End() - 1);
306 alSeq2.setDatasetSequence(
307 s2.getDatasetSequence() == null ? s2 : s2.getDatasetSequence());
312 * Construct score matrix for sequences with standard DNA or PEPTIDE matrix
317 * - string to use for s1
321 * - string to use for s2
325 public void seqInit(SequenceI s1, String string1, SequenceI s2,
326 String string2, String type)
330 setDefaultParams(type);
331 seqInit(string1, string2);
335 * construct score matrix for string1 and string2 (after removing any existing
341 private void seqInit(String string1, String string2)
343 s1str = extractGaps(jalview.util.Comparison.GapChars, string1);
344 s2str = extractGaps(jalview.util.Comparison.GapChars, string2);
346 if (s1str.length() == 0 || s2str.length() == 0)
349 "ALL GAPS: " + (s1str.length() == 0 ? s1.getName() : " ")
350 + (s2str.length() == 0 ? s2.getName() : ""));
354 score = new float[s1str.length()][s2str.length()];
356 E = new float[s1str.length()][s2str.length()];
358 F = new float[s1str.length()][s2str.length()];
359 traceback = new int[s1str.length()][s2str.length()];
361 seq1 = indexEncode(s1str);
363 seq2 = indexEncode(s2str);
366 private void setDefaultParams(String moleculeType)
368 if (!PEP.equals(moleculeType) && !DNA.equals(moleculeType))
370 output.append("Wrong type = dna or pep only");
371 throw new Error(MessageManager
372 .formatMessage("error.unknown_type_dna_or_pep", new String[]
377 scoreMatrix = ScoreModels.getInstance()
378 .getDefaultModel(PEP.equals(type));
385 public void traceAlignment()
387 // Find the maximum score along the rhs or bottom row
388 float max = -Float.MAX_VALUE;
390 for (int i = 0; i < seq1.length; i++)
392 if (score[i][seq2.length - 1] > max)
394 max = score[i][seq2.length - 1];
396 maxj = seq2.length - 1;
400 for (int j = 0; j < seq2.length; j++)
402 if (score[seq1.length - 1][j] > max)
404 max = score[seq1.length - 1][j];
405 maxi = seq1.length - 1;
413 //maxscore = score[i][j] / 10f;
414 maxscore = score[i][j];
416 //&! get trailing gaps
417 while ((i < seq1.length - 1) || (j < seq2.length - 1))
426 aseq1 = new int[seq1.length + seq2.length];
427 aseq2 = new int[seq1.length + seq2.length];
429 StringBuilder sb1 = new StringBuilder(aseq1.length);
430 StringBuilder sb2 = new StringBuilder(aseq2.length);
432 count = (seq1.length + seq2.length) - 1;
434 //&! get trailing gaps
435 while ((i >= seq1.length) || (j >= seq2.length))
437 if (i >= seq1.length)
439 aseq1[count] = GAP_INDEX;
441 aseq2[count] = seq2[j];
442 sb2.append(s2str.charAt(j));
443 } else if (j >= seq2.length) {
444 aseq1[count] = seq1[i];
445 sb1.append(s1str.charAt(i));
446 aseq2[count] = GAP_INDEX;
454 while (i > 0 && j > 0)
456 aseq1[count] = seq1[i];
457 sb1.append(s1str.charAt(i));
458 aseq2[count] = seq2[j];
459 sb2.append(s2str.charAt(j));
461 trace = findTrace(i, j);
471 aseq1[count] = GAP_INDEX;
472 sb1.replace(sb1.length() - 1, sb1.length(), "-");
474 else if (trace == -1)
477 aseq2[count] = GAP_INDEX;
478 sb2.replace(sb2.length() - 1, sb2.length(), "-");
487 if (aseq1[count] != GAP_INDEX)
489 aseq1[count] = seq1[i];
490 sb1.append(s1str.charAt(i));
493 if (aseq2[count] != GAP_INDEX)
495 aseq2[count] = seq2[j];
496 sb2.append(s2str.charAt(j));
499 //&! get initial gaps
500 while (j > 0 || i > 0)
505 sb2.append(s2str.charAt(j));
508 sb1.append(s1str.charAt(i));
515 * we built the character strings backwards, so now
516 * reverse them to convert to sequence strings
518 astr1 = sb1.reverse().toString();
519 astr2 = sb2.reverse().toString();
525 public void printAlignment(PrintStream os)
527 // TODO: Use original sequence characters rather than re-translated
528 // characters in output
529 // Find the biggest id length for formatting purposes
530 String s1id = getAlignedSeq1().getDisplayId(true);
531 String s2id = getAlignedSeq2().getDisplayId(true);
532 int nameLength = Math.max(s1id.length(), s2id.length());
533 if (nameLength > MAX_NAME_LENGTH)
535 int truncateBy = nameLength - MAX_NAME_LENGTH;
536 nameLength = MAX_NAME_LENGTH;
537 // JAL-527 - truncate the sequence ids
538 if (s1id.length() > nameLength)
540 int slashPos = s1id.lastIndexOf('/');
541 s1id = s1id.substring(0, slashPos - truncateBy)
542 + s1id.substring(slashPos);
544 if (s2id.length() > nameLength)
546 int slashPos = s2id.lastIndexOf('/');
547 s2id = s2id.substring(0, slashPos - truncateBy)
548 + s2id.substring(slashPos);
551 int len = 72 - nameLength - 1;
552 int nochunks = ((aseq1.length - count) / len)
553 + ((aseq1.length - count) % len > 0 ? 1 : 0);
556 output.append("Score = ").append(score[maxi][maxj]).append(NEWLINE);
557 output.append("Length of alignment = ")
558 .append(String.valueOf(aseq1.length - count)).append(NEWLINE);
559 output.append("Sequence ");
560 Format nameFormat = new Format("%" + nameLength + "s");
561 output.append(nameFormat.form(s1id));
562 output.append(" (Sequence length = ")
563 .append(String.valueOf(s1str.length())).append(")")
565 output.append("Sequence ");
566 output.append(nameFormat.form(s2id));
567 output.append(" (Sequence length = ")
568 .append(String.valueOf(s2str.length())).append(")")
569 .append(NEWLINE).append(NEWLINE);
571 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
573 for (int j = 0; j < nochunks; j++)
575 // Print the first aligned sequence
576 output.append(nameFormat.form(s1id)).append(" ");
578 for (int i = 0; i < len; i++)
580 if ((i + (j * len)) < astr1.length())
582 output.append(astr1.charAt(i + (j * len)));
586 output.append(NEWLINE);
587 output.append(nameFormat.form(" ")).append(" ");
590 * Print out the match symbols:
591 * | for exact match (ignoring case)
592 * . if PAM250 score is positive
595 for (int i = 0; i < len; i++)
597 if ((i + (j * len)) < astr1.length())
599 char c1 = astr1.charAt(i + (j * len));
600 char c2 = astr2.charAt(i + (j * len));
601 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
602 if (sameChar && !Comparison.isGap(c1))
607 else if (PEP.equals(type))
609 if (pam250.getPairwiseScore(c1, c2) > 0)
625 // Now print the second aligned sequence
626 output = output.append(NEWLINE);
627 output = output.append(nameFormat.form(s2id)).append(" ");
629 for (int i = 0; i < len; i++)
631 if ((i + (j * len)) < astr2.length())
633 output.append(astr2.charAt(i + (j * len)));
637 output.append(NEWLINE).append(NEWLINE);
640 pid = pid / (aseq1.length - count) * 100;
641 output.append(new Format("Percentage ID = %3.2f\n").form(pid));
642 output.append(NEWLINE);
645 os.print(output.toString());
646 } catch (Exception ex)
659 * @return DOCUMENT ME!
662 public int findTrace(int i, int j)
665 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
667 //float max = score[i - 1][j - 1] + (pairwiseScore * 10);
668 float max = score[i - 1][j - 1] + (pairwiseScore);
675 else if (F[i][j] == max)
689 else if (E[i][j] == max)
707 public void calcScoreMatrix()
712 // top left hand element
713 score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0),
715 //s2str.charAt(0)) * 10;
716 E[0][0] = -GAP_EXTEND_COST;
719 // Calculate the top row first
720 for (int j = 1; j < m; j++)
722 // What should these values be? 0 maybe
723 E[0][j] = max(score[0][j - 1] - GAP_OPEN_COST,
724 E[0][j - 1] - GAP_EXTEND_COST);
725 F[0][j] = -GAP_EXTEND_COST;
727 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0),
729 //score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST,
730 score[0][j] = max(pairwiseScore, -GAP_OPEN_COST,
736 // Now do the left hand column
737 for (int i = 1; i < n; i++)
739 E[i][0] = -GAP_OPEN_COST;
740 F[i][0] = max(score[i - 1][0] - GAP_OPEN_COST,
741 F[i - 1][0] - GAP_EXTEND_COST);
743 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
745 //score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
746 score[i][0] = max(pairwiseScore, E[i][0], F[i][0]);
747 traceback[i][0] = -1;
750 // Now do all the other rows
751 for (int i = 1; i < n; i++)
753 for (int j = 1; j < m; j++)
755 E[i][j] = max(score[i][j - 1] - GAP_OPEN_COST,
756 E[i][j - 1] - GAP_EXTEND_COST);
757 F[i][j] = max(score[i - 1][j] - GAP_OPEN_COST,
758 F[i - 1][j] - GAP_EXTEND_COST);
760 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
762 //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10),
763 score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore),
765 traceback[i][j] = findTrace(i, j);
771 * Returns the given sequence with all of the given gap characters removed.
774 * a string of characters to be treated as gaps
780 public static String extractGaps(String gapChars, String seq)
782 if (gapChars == null || seq == null)
786 StringTokenizer str = new StringTokenizer(seq, gapChars);
787 StringBuilder newString = new StringBuilder(seq.length());
789 while (str.hasMoreTokens())
791 newString.append(str.nextToken());
794 return newString.toString();
807 * @return DOCUMENT ME!
809 private static float max(float f1, float f2, float f3)
834 * @return DOCUMENT ME!
836 private static float max(float f1, float f2)
849 * Converts the character string to an array of integers which are the
850 * corresponding indices to the characters in the score matrix
856 int[] indexEncode(String s)
858 int[] encoded = new int[s.length()];
860 for (int i = 0; i < s.length(); i++)
862 char c = s.charAt(i);
863 encoded[i] = scoreMatrix.getMatrixIndex(c);
883 public static void displayMatrix(Graphics g, int[][] mat, int n, int m,
886 // TODO method doesn't seem to be referenced anywhere delete??
890 for (int i = 0; i < n; i++)
892 for (int j = 0; j < m; j++)
894 if (mat[i][j] >= max)
899 if (mat[i][j] <= min)
906 System.out.println(max + " " + min);
908 for (int i = 0; i < n; i++)
910 for (int j = 0; j < m; j++)
915 // System.out.println(mat[i][j]);
916 float score = (float) (mat[i][j] - min) / (float) (max - min);
917 g.setColor(new Color(score, 0, 0));
918 g.fillRect(x, y, psize, psize);
920 // System.out.println(x + " " + y + " " + score);
926 * Compute a globally optimal needleman and wunsch alignment between two
932 * AlignSeq.DNA or AlignSeq.PEP
934 public static AlignSeq doGlobalNWAlignment(SequenceI s1, SequenceI s2,
937 AlignSeq as = new AlignSeq(s1, s2, type);
939 as.calcScoreMatrix();
946 * @return mapping from positions in S1 to corresponding positions in S2
948 public jalview.datamodel.Mapping getMappingFromS1(boolean allowmismatch)
950 ArrayList<Integer> as1 = new ArrayList<Integer>(),
951 as2 = new ArrayList<Integer>();
952 int pdbpos = s2.getStart() + getSeq2Start() - 2;
953 int alignpos = s1.getStart() + getSeq1Start() - 2;
954 int lp2 = pdbpos - 3, lp1 = alignpos - 3;
955 boolean lastmatch = false;
956 // and now trace the alignment onto the atom set.
957 for (int i = 0; i < astr1.length(); i++)
959 char c1 = astr1.charAt(i), c2 = astr2.charAt(i);
970 if (allowmismatch || c1 == c2)
972 // extend mapping interval
973 if (lp1 + 1 != alignpos || lp2 + 1 != pdbpos)
975 as1.add(Integer.valueOf(alignpos));
976 as2.add(Integer.valueOf(pdbpos));
984 // extend mapping interval
987 as1.add(Integer.valueOf(lp1));
988 as2.add(Integer.valueOf(lp2));
993 // construct range pairs
995 int[] mapseq1 = new int[as1.size() + (lastmatch ? 1 : 0)],
996 mapseq2 = new int[as2.size() + (lastmatch ? 1 : 0)];
998 for (Integer ip : as1)
1004 for (Integer ip : as2)
1011 mapseq1[mapseq1.length - 1] = alignpos;
1012 mapseq2[mapseq2.length - 1] = pdbpos;
1014 MapList map = new MapList(mapseq1, mapseq2, 1, 1);
1016 jalview.datamodel.Mapping mapping = new Mapping(map);
1022 * matches ochains against al and populates seqs with the best match between
1023 * each ochain and the set in al
1027 * @param dnaOrProtein
1028 * @param removeOldAnnots
1029 * when true, old annotation is cleared before new annotation
1031 * @return List<List<SequenceI> originals, List<SequenceI> replacement,
1032 * List<AlignSeq> alignment between each>
1034 public static List<List<? extends Object>> replaceMatchingSeqsWith(
1035 List<SequenceI> seqs, List<AlignmentAnnotation> annotations,
1036 List<SequenceI> ochains, AlignmentI al, String dnaOrProtein,
1037 boolean removeOldAnnots)
1039 List<SequenceI> orig = new ArrayList<SequenceI>(),
1040 repl = new ArrayList<SequenceI>();
1041 List<AlignSeq> aligs = new ArrayList<AlignSeq>();
1042 if (al != null && al.getHeight() > 0)
1044 ArrayList<SequenceI> matches = new ArrayList<SequenceI>();
1045 ArrayList<AlignSeq> aligns = new ArrayList<AlignSeq>();
1047 for (SequenceI sq : ochains)
1049 SequenceI bestm = null;
1050 AlignSeq bestaseq = null;
1051 float bestscore = 0;
1052 for (SequenceI msq : al.getSequences())
1054 AlignSeq aseq = doGlobalNWAlignment(msq, sq, dnaOrProtein);
1055 if (bestm == null || aseq.getMaxScore() > bestscore)
1057 bestscore = aseq.getMaxScore();
1062 // System.out.println("Best Score for " + (matches.size() + 1) + " :"
1065 aligns.add(bestaseq);
1066 al.deleteSequence(bestm);
1068 for (int p = 0, pSize = seqs.size(); p < pSize; p++)
1070 SequenceI sq, sp = seqs.get(p);
1072 if ((q = ochains.indexOf(sp)) > -1)
1074 seqs.set(p, sq = matches.get(q));
1077 sq.setName(sp.getName());
1078 sq.setDescription(sp.getDescription());
1080 sq.transferAnnotation(sp,
1081 sp2sq = aligns.get(q).getMappingFromS1(false));
1082 aligs.add(aligns.get(q));
1084 for (int ap = 0; ap < annotations.size();)
1086 if (annotations.get(ap).sequenceRef == sp)
1092 if (removeOldAnnots)
1094 annotations.remove(ap);
1098 AlignmentAnnotation alan = annotations.remove(ap);
1099 alan.liftOver(sq, sp2sq);
1100 alan.setSequenceRef(sq);
1101 sq.addAlignmentAnnotation(alan);
1109 if (sq.getAnnotation() != null && sq.getAnnotation().length > 0)
1111 annotations.addAll(inspos == -1 ? annotations.size() : inspos,
1112 Arrays.asList(sq.getAnnotation()));
1117 return Arrays.asList(orig, repl, aligs);
1121 * compute the PID vector used by the redundancy filter.
1123 * @param originalSequences
1124 * - sequences in alignment that are to filtered
1126 * - null or strings to be analysed (typically, visible portion of
1127 * each sequence in alignment)
1129 * - first column in window for calculation
1131 * - last column in window for calculation
1133 * - if true then use ungapped sequence to compute PID
1134 * @return vector containing maximum PID for i-th sequence and any sequences
1135 * longer than that seuqence
1137 public static float[] computeRedundancyMatrix(
1138 SequenceI[] originalSequences, String[] omitHidden, int start,
1139 int end, boolean ungapped)
1141 int height = originalSequences.length;
1142 float[] redundancy = new float[height];
1143 int[] lngth = new int[height];
1144 for (int i = 0; i < height; i++)
1150 // long start = System.currentTimeMillis();
1152 SimilarityParams pidParams = new SimilarityParams(true, true, true,
1156 for (int i = 0; i < height; i++)
1159 for (int j = 0; j < i; j++)
1166 if (omitHidden == null)
1168 seqi = originalSequences[i].getSequenceAsString(start, end);
1169 seqj = originalSequences[j].getSequenceAsString(start, end);
1173 seqi = omitHidden[i];
1174 seqj = omitHidden[j];
1178 String ug = AlignSeq.extractGaps(Comparison.GapChars, seqi);
1179 lngth[i] = ug.length();
1187 String ug = AlignSeq.extractGaps(Comparison.GapChars, seqj);
1188 lngth[j] = ug.length();
1194 pid = (float) PIDModel.computePID(seqi, seqj, pidParams);
1196 // use real sequence length rather than string length
1197 if (lngth[j] < lngth[i])
1199 redundancy[j] = Math.max(pid, redundancy[j]);
1203 redundancy[i] = Math.max(pid, redundancy[i]);
1212 * calculate the mean score of the alignment
1213 * mean score is equal to the score of an alignmenet of two sequences with randomly shuffled AA sequence composited of the same AA as the two original sequences
1216 public void meanScore()
1218 int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length();
1219 HashMap<Character, Integer> seq1ResCount = new HashMap<Character, Integer>();
1220 HashMap<Character, Integer> seq2ResCount = new HashMap<Character, Integer>();
1222 for (char residue: s1str.toCharArray())
1224 seq1ResCount.putIfAbsent(residue, 0);
1225 seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1);
1227 for (char residue: s2str.toCharArray())
1229 seq2ResCount.putIfAbsent(residue, 0);
1230 seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1);
1233 float _meanscore = 0;
1234 for (char resA : seq1ResCount.keySet())
1236 for (char resB : seq2ResCount.keySet())
1238 int countA = seq1ResCount.get(resA);
1239 int countB = seq2ResCount.get(resB);
1241 float scoreAB = scoreMatrix.getPairwiseScore(resA, resB);
1243 _meanscore += countA * countB * scoreAB;
1246 _meanscore /= length;
1247 this.meanScore = _meanscore;
1250 public float getMeanScore()
1252 return this.meanScore;
1256 * calculate the hypothetic max score using the self-alignment of the sequences
1258 public void hypotheticMaxScore()
1262 for (char residue: s1str.toCharArray())
1264 _hmsA += scoreMatrix.getPairwiseScore(residue, residue);
1266 for (char residue: s2str.toCharArray())
1268 _hmsB += scoreMatrix.getPairwiseScore(residue, residue);
1270 this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB;
1273 public float getHypotheticMaxScore()
1275 return this.hypotheticMaxScore;
1279 * create strings based of astr1 and astr2 but without gaps
1281 public void getIndelfreeAstr()
1283 int n = astr1.length(); // both have the same length
1284 for (int i = 0; i < n; i++)
1286 if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i)))
1288 this.indelfreeAstr1 += astr1.charAt(i);
1289 this.indelfreeAstr2 += astr2.charAt(i);
1295 * calculates the overall score of the alignment
1296 * alignmentScore = sum of all scores - all penalties
1299 public void scoreAlignment() throws RuntimeException
1303 hypotheticMaxScore();
1304 // cannot calculate score because denominator would be zero
1305 if (this.hypotheticMaxScore == this.meanScore)
1307 throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore));
1309 int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length();
1311 System.out.println(String.format("astr1: %c .. %c ; astr2: %c .. %c", astr1.toCharArray()[0], astr1.toCharArray()[astr1.length() - 1], astr2.toCharArray()[0], astr2.toCharArray()[astr2.length() - 1]));
1314 boolean aGapOpen = false;
1315 boolean bGapOpen = false;
1316 for (int i = 0; i < n; i++)
1318 char char1 = astr1.charAt(i);
1319 char char2 = astr2.charAt(i);
1320 boolean aIsLetter = Character.isLetter(char1);
1321 boolean bIsLetter = Character.isLetter(char2);
1322 if (aIsLetter && bIsLetter) // if pair -> get score
1324 score += scoreMatrix.getPairwiseScore(char1, char2);
1325 } else if (!aIsLetter && !bIsLetter) { // both are gap -> skip
1326 } else if ((!aIsLetter && aGapOpen) || (!bIsLetter && bGapOpen)) { // one side gapopen -> score - gap_extend
1327 score -= GAP_EXTEND_COST;
1328 } else { // no gap open -> score - gap_open
1329 score -= GAP_OPEN_COST;
1331 aGapOpen = (!aIsLetter) ? true : false;
1332 bGapOpen = (!bIsLetter) ? true : false;
1335 float preprescore = score;
1336 score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore);
1337 int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max}
1338 float coverage = (float) indelfreeAstr1.length() / (float) _max[1];
1339 float prescore = score;
1342 System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %f", preprescore, prescore, indelfreeAstr1.length(), score, this.meanScore, this.hypotheticMaxScore));
1343 this.alignmentScore = (preprescore < 1) ? Float.NaN : score;