2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import java.util.Locale;
25 import jalview.analysis.scoremodels.PIDModel;
26 import jalview.analysis.scoremodels.ScoreMatrix;
27 import jalview.analysis.scoremodels.ScoreModels;
28 import jalview.analysis.scoremodels.SimilarityParams;
29 import jalview.datamodel.AlignmentAnnotation;
30 import jalview.datamodel.AlignmentI;
31 import jalview.datamodel.Mapping;
32 import jalview.datamodel.Sequence;
33 import jalview.datamodel.SequenceI;
34 import jalview.math.MiscMath;
35 import jalview.util.Comparison;
36 import jalview.util.Format;
37 import jalview.util.MapList;
38 import jalview.util.MessageManager;
40 import java.awt.Color;
41 import java.awt.Graphics;
42 import java.io.PrintStream;
43 import java.lang.IllegalArgumentException;
44 import java.util.ArrayList;
45 import java.util.Arrays;
46 import java.util.HashMap;
47 import java.util.List;
48 import java.util.StringTokenizer;
58 private static final int MAX_NAME_LENGTH = 30;
61 private static final int GAP_OPEN_COST = 120;
62 //private static final int GAP_OPEN_COST = 100;
64 private static final int GAP_EXTEND_COST = 20;
65 //private static final int GAP_EXTEND_COST = 5;
67 private static final int GAP_INDEX = -1;
69 public static final String PEP = "pep";
71 public static final String DNA = "dna";
73 private static final String NEWLINE = System.lineSeparator();
83 int[][] traceback; // todo is this actually used?
105 public String astr1 = "";
107 public String astr2 = "";
109 public String indelfreeAstr1 = "";
111 public String indelfreeAstr2 = "";
114 public int seq1start;
120 public int seq2start;
126 public float maxscore;
128 public float meanScore; //needed for PaSiMap
130 public int hypotheticMaxScore; // needed for PaSiMap
134 StringBuffer output = new StringBuffer();
136 String type; // AlignSeq.PEP or AlignSeq.DNA
138 private ScoreMatrix scoreMatrix;
141 * Creates a new AlignSeq object.
144 * first sequence for alignment
146 * second sequence for alignment
148 * molecule type, either AlignSeq.PEP or AlignSeq.DNA
150 public AlignSeq(SequenceI s1, SequenceI s2, String type)
152 seqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(),
157 * Creates a new AlignSeq object.
166 public AlignSeq(SequenceI s1, String string1, SequenceI s2,
167 String string2, String type)
169 seqInit(s1, string1.toUpperCase(Locale.ROOT), s2,
170 string2.toUpperCase(Locale.ROOT), type);
176 * @return DOCUMENT ME!
178 public float getMaxScore()
184 * returns the overall score of the alignment
188 public float getAlignmentScore()
190 return alignmentScore;
196 * @return DOCUMENT ME!
198 public int getSeq2Start()
206 * @return DOCUMENT ME!
208 public int getSeq2End()
216 * @return DOCUMENT ME!
218 public int getSeq1Start()
226 * @return DOCUMENT ME!
228 public int getSeq1End()
236 * @return DOCUMENT ME!
238 public String getOutput()
240 return output.toString();
246 * @return DOCUMENT ME!
248 public String getAStr1()
256 * @return DOCUMENT ME!
258 public String getAStr2()
266 * @return DOCUMENT ME!
268 public int[] getASeq1()
276 * @return DOCUMENT ME!
278 public int[] getASeq2()
285 * @return aligned instance of Seq 1
287 public SequenceI getAlignedSeq1()
289 SequenceI alSeq1 = new Sequence(s1.getName(), getAStr1());
290 alSeq1.setStart(s1.getStart() + getSeq1Start() - 1);
291 alSeq1.setEnd(s1.getStart() + getSeq1End() - 1);
292 alSeq1.setDatasetSequence(
293 s1.getDatasetSequence() == null ? s1 : s1.getDatasetSequence());
299 * @return aligned instance of Seq 2
301 public SequenceI getAlignedSeq2()
303 SequenceI alSeq2 = new Sequence(s2.getName(), getAStr2());
304 alSeq2.setStart(s2.getStart() + getSeq2Start() - 1);
305 alSeq2.setEnd(s2.getStart() + getSeq2End() - 1);
306 alSeq2.setDatasetSequence(
307 s2.getDatasetSequence() == null ? s2 : s2.getDatasetSequence());
312 * Construct score matrix for sequences with standard DNA or PEPTIDE matrix
317 * - string to use for s1
321 * - string to use for s2
325 public void seqInit(SequenceI s1, String string1, SequenceI s2,
326 String string2, String type)
330 setDefaultParams(type);
331 seqInit(string1, string2);
335 * construct score matrix for string1 and string2 (after removing any existing
341 private void seqInit(String string1, String string2)
343 s1str = extractGaps(jalview.util.Comparison.GapChars, string1);
344 s2str = extractGaps(jalview.util.Comparison.GapChars, string2);
346 if (s1str.length() == 0 || s2str.length() == 0)
349 "ALL GAPS: " + (s1str.length() == 0 ? s1.getName() : " ")
350 + (s2str.length() == 0 ? s2.getName() : ""));
354 score = new float[s1str.length()][s2str.length()];
356 E = new float[s1str.length()][s2str.length()];
358 F = new float[s1str.length()][s2str.length()];
359 traceback = new int[s1str.length()][s2str.length()];
361 seq1 = indexEncode(s1str);
363 seq2 = indexEncode(s2str);
366 private void setDefaultParams(String moleculeType)
368 if (!PEP.equals(moleculeType) && !DNA.equals(moleculeType))
370 output.append("Wrong type = dna or pep only");
371 throw new Error(MessageManager
372 .formatMessage("error.unknown_type_dna_or_pep", new String[]
377 scoreMatrix = ScoreModels.getInstance()
378 .getDefaultModel(PEP.equals(type));
385 public void traceAlignment()
387 // Find the maximum score along the rhs or bottom row
388 float max = -Float.MAX_VALUE;
390 for (int i = 0; i < seq1.length; i++)
392 if (score[i][seq2.length - 1] > max)
394 max = score[i][seq2.length - 1];
396 maxj = seq2.length - 1;
400 for (int j = 0; j < seq2.length; j++)
402 if (score[seq1.length - 1][j] > max)
404 max = score[seq1.length - 1][j];
405 maxi = seq1.length - 1;
413 maxscore = score[i][j] / 10f;
414 //maxscore = score[i][j];
416 //&! get trailing gaps
417 while ((i < seq1.length - 1) || (j < seq2.length - 1))
426 aseq1 = new int[seq1.length + seq2.length];
427 aseq2 = new int[seq1.length + seq2.length];
429 StringBuilder sb1 = new StringBuilder(aseq1.length);
430 StringBuilder sb2 = new StringBuilder(aseq2.length);
432 count = (seq1.length + seq2.length) - 1;
434 //&! get trailing gaps
435 while ((i >= seq1.length) || (j >= seq2.length))
437 if (i >= seq1.length)
439 aseq1[count] = GAP_INDEX;
441 aseq2[count] = seq2[j];
442 sb2.append(s2str.charAt(j));
443 } else if (j >= seq2.length) {
444 aseq1[count] = seq1[i];
445 sb1.append(s1str.charAt(i));
446 aseq2[count] = GAP_INDEX;
454 while (i > 0 && j > 0)
456 aseq1[count] = seq1[i];
457 sb1.append(s1str.charAt(i));
458 aseq2[count] = seq2[j];
459 sb2.append(s2str.charAt(j));
461 trace = findTrace(i, j);
471 aseq1[count] = GAP_INDEX;
472 sb1.replace(sb1.length() - 1, sb1.length(), "-");
474 else if (trace == -1)
477 aseq2[count] = GAP_INDEX;
478 sb2.replace(sb2.length() - 1, sb2.length(), "-");
487 if (aseq1[count] != GAP_INDEX)
489 aseq1[count] = seq1[i];
490 sb1.append(s1str.charAt(i));
493 if (aseq2[count] != GAP_INDEX)
495 aseq2[count] = seq2[j];
496 sb2.append(s2str.charAt(j));
499 //&! get initial gaps
500 while (j > 0 || i > 0)
505 sb2.append(s2str.charAt(j));
508 sb1.append(s1str.charAt(i));
515 * we built the character strings backwards, so now
516 * reverse them to convert to sequence strings
518 astr1 = sb1.reverse().toString();
519 astr2 = sb2.reverse().toString();
525 public void printAlignment(PrintStream os)
527 // TODO: Use original sequence characters rather than re-translated
528 // characters in output
529 // Find the biggest id length for formatting purposes
530 String s1id = getAlignedSeq1().getDisplayId(true);
531 String s2id = getAlignedSeq2().getDisplayId(true);
532 int nameLength = Math.max(s1id.length(), s2id.length());
533 if (nameLength > MAX_NAME_LENGTH)
535 int truncateBy = nameLength - MAX_NAME_LENGTH;
536 nameLength = MAX_NAME_LENGTH;
537 // JAL-527 - truncate the sequence ids
538 if (s1id.length() > nameLength)
540 int slashPos = s1id.lastIndexOf('/');
541 s1id = s1id.substring(0, slashPos - truncateBy)
542 + s1id.substring(slashPos);
544 if (s2id.length() > nameLength)
546 int slashPos = s2id.lastIndexOf('/');
547 s2id = s2id.substring(0, slashPos - truncateBy)
548 + s2id.substring(slashPos);
551 int len = 72 - nameLength - 1;
552 int nochunks = ((aseq1.length - count) / len)
553 + ((aseq1.length - count) % len > 0 ? 1 : 0);
556 output.append("Score = ").append(score[maxi][maxj]).append(NEWLINE);
557 output.append("Length of alignment = ")
558 .append(String.valueOf(aseq1.length - count)).append(NEWLINE);
559 output.append("Sequence ");
560 Format nameFormat = new Format("%" + nameLength + "s");
561 output.append(nameFormat.form(s1id));
562 output.append(" (Sequence length = ")
563 .append(String.valueOf(s1str.length())).append(")")
565 output.append("Sequence ");
566 output.append(nameFormat.form(s2id));
567 output.append(" (Sequence length = ")
568 .append(String.valueOf(s2str.length())).append(")")
569 .append(NEWLINE).append(NEWLINE);
571 ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
573 for (int j = 0; j < nochunks; j++)
575 // Print the first aligned sequence
576 output.append(nameFormat.form(s1id)).append(" ");
578 for (int i = 0; i < len; i++)
580 if ((i + (j * len)) < astr1.length())
582 output.append(astr1.charAt(i + (j * len)));
586 output.append(NEWLINE);
587 output.append(nameFormat.form(" ")).append(" ");
590 * Print out the match symbols:
591 * | for exact match (ignoring case)
592 * . if PAM250 score is positive
595 for (int i = 0; i < len; i++)
597 if ((i + (j * len)) < astr1.length())
599 char c1 = astr1.charAt(i + (j * len));
600 char c2 = astr2.charAt(i + (j * len));
601 boolean sameChar = Comparison.isSameResidue(c1, c2, false);
602 if (sameChar && !Comparison.isGap(c1))
607 else if (PEP.equals(type))
609 if (pam250.getPairwiseScore(c1, c2) > 0)
625 // Now print the second aligned sequence
626 output = output.append(NEWLINE);
627 output = output.append(nameFormat.form(s2id)).append(" ");
629 for (int i = 0; i < len; i++)
631 if ((i + (j * len)) < astr2.length())
633 output.append(astr2.charAt(i + (j * len)));
637 output.append(NEWLINE).append(NEWLINE);
640 pid = pid / (aseq1.length - count) * 100;
641 output.append(new Format("Percentage ID = %3.2f\n").form(pid));
642 output.append(NEWLINE);
645 os.print(output.toString());
646 } catch (Exception ex)
659 * @return DOCUMENT ME!
662 public int findTrace(int i, int j)
665 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
667 float max = score[i - 1][j - 1] + (pairwiseScore * 10);
668 //float max = score[i - 1][j - 1] + (pairwiseScore);
675 else if (F[i][j] == max)
689 else if (E[i][j] == max)
707 public void calcScoreMatrix()
712 // top left hand element
713 score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0),
715 s2str.charAt(0)) * 10;
716 E[0][0] = -GAP_EXTEND_COST;
719 // Calculate the top row first
720 for (int j = 1; j < m; j++)
722 // What should these values be? 0 maybe
723 E[0][j] = max(score[0][j - 1] - GAP_OPEN_COST,
724 E[0][j - 1] - GAP_EXTEND_COST);
725 F[0][j] = -GAP_EXTEND_COST;
727 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0),
729 score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST,
730 //score[0][j] = max(pairwiseScore, -GAP_OPEN_COST,
736 // Now do the left hand column
737 for (int i = 1; i < n; i++)
739 E[i][0] = -GAP_OPEN_COST;
740 F[i][0] = max(score[i - 1][0] - GAP_OPEN_COST,
741 F[i - 1][0] - GAP_EXTEND_COST);
743 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
745 score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
746 //score[i][0] = max(pairwiseScore, E[i][0], F[i][0]);
747 traceback[i][0] = -1;
750 // Now do all the other rows
751 for (int i = 1; i < n; i++)
753 for (int j = 1; j < m; j++)
755 E[i][j] = max(score[i][j - 1] - GAP_OPEN_COST,
756 E[i][j - 1] - GAP_EXTEND_COST);
757 F[i][j] = max(score[i - 1][j] - GAP_OPEN_COST,
758 F[i - 1][j] - GAP_EXTEND_COST);
760 float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
762 score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10),
763 //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore),
765 traceback[i][j] = findTrace(i, j);
771 * Returns the given sequence with all of the given gap characters removed.
774 * a string of characters to be treated as gaps
780 public static String extractGaps(String gapChars, String seq)
782 if (gapChars == null || seq == null)
786 StringTokenizer str = new StringTokenizer(seq, gapChars);
787 StringBuilder newString = new StringBuilder(seq.length());
789 while (str.hasMoreTokens())
791 newString.append(str.nextToken());
794 return newString.toString();
807 * @return DOCUMENT ME!
809 private static float max(float f1, float f2, float f3)
834 * @return DOCUMENT ME!
836 private static float max(float f1, float f2)
849 * Converts the character string to an array of integers which are the
850 * corresponding indices to the characters in the score matrix
856 int[] indexEncode(String s)
858 int[] encoded = new int[s.length()];
860 for (int i = 0; i < s.length(); i++)
862 char c = s.charAt(i);
863 encoded[i] = scoreMatrix.getMatrixIndex(c);
883 public static void displayMatrix(Graphics g, int[][] mat, int n, int m,
886 // TODO method doesn't seem to be referenced anywhere delete??
890 for (int i = 0; i < n; i++)
892 for (int j = 0; j < m; j++)
894 if (mat[i][j] >= max)
899 if (mat[i][j] <= min)
906 System.out.println(max + " " + min);
908 for (int i = 0; i < n; i++)
910 for (int j = 0; j < m; j++)
915 // System.out.println(mat[i][j]);
916 float score = (float) (mat[i][j] - min) / (float) (max - min);
917 g.setColor(new Color(score, 0, 0));
918 g.fillRect(x, y, psize, psize);
920 // System.out.println(x + " " + y + " " + score);
926 * Compute a globally optimal needleman and wunsch alignment between two
932 * AlignSeq.DNA or AlignSeq.PEP
934 public static AlignSeq doGlobalNWAlignment(SequenceI s1, SequenceI s2,
937 AlignSeq as = new AlignSeq(s1, s2, type);
939 as.calcScoreMatrix();
946 * @return mapping from positions in S1 to corresponding positions in S2
948 public jalview.datamodel.Mapping getMappingFromS1(boolean allowmismatch)
950 ArrayList<Integer> as1 = new ArrayList<Integer>(),
951 as2 = new ArrayList<Integer>();
952 int pdbpos = s2.getStart() + getSeq2Start() - 2;
953 int alignpos = s1.getStart() + getSeq1Start() - 2;
954 int lp2 = pdbpos - 3, lp1 = alignpos - 3;
955 boolean lastmatch = false;
956 // and now trace the alignment onto the atom set.
957 for (int i = 0; i < astr1.length(); i++)
959 char c1 = astr1.charAt(i), c2 = astr2.charAt(i);
970 // ignore case differences
971 if (allowmismatch || (c1 == c2) || (Math.abs(c2-c1)==('a'-'A')))
973 // extend mapping interval
974 if (lp1 + 1 != alignpos || lp2 + 1 != pdbpos)
976 as1.add(Integer.valueOf(alignpos));
977 as2.add(Integer.valueOf(pdbpos));
985 // extend mapping interval
988 as1.add(Integer.valueOf(lp1));
989 as2.add(Integer.valueOf(lp2));
994 // construct range pairs
996 int[] mapseq1 = new int[as1.size() + (lastmatch ? 1 : 0)],
997 mapseq2 = new int[as2.size() + (lastmatch ? 1 : 0)];
999 for (Integer ip : as1)
1005 for (Integer ip : as2)
1012 mapseq1[mapseq1.length - 1] = alignpos;
1013 mapseq2[mapseq2.length - 1] = pdbpos;
1015 MapList map = new MapList(mapseq1, mapseq2, 1, 1);
1017 jalview.datamodel.Mapping mapping = new Mapping(map);
1023 * matches ochains against al and populates seqs with the best match between
1024 * each ochain and the set in al
1028 * @param dnaOrProtein
1029 * @param removeOldAnnots
1030 * when true, old annotation is cleared before new annotation
1032 * @return List<List<SequenceI> originals, List<SequenceI> replacement,
1033 * List<AlignSeq> alignment between each>
1035 public static List<List<? extends Object>> replaceMatchingSeqsWith(
1036 List<SequenceI> seqs, List<AlignmentAnnotation> annotations,
1037 List<SequenceI> ochains, AlignmentI al, String dnaOrProtein,
1038 boolean removeOldAnnots)
1040 List<SequenceI> orig = new ArrayList<SequenceI>(),
1041 repl = new ArrayList<SequenceI>();
1042 List<AlignSeq> aligs = new ArrayList<AlignSeq>();
1043 if (al != null && al.getHeight() > 0)
1045 ArrayList<SequenceI> matches = new ArrayList<SequenceI>();
1046 ArrayList<AlignSeq> aligns = new ArrayList<AlignSeq>();
1048 for (SequenceI sq : ochains)
1050 SequenceI bestm = null;
1051 AlignSeq bestaseq = null;
1052 float bestscore = 0;
1053 for (SequenceI msq : al.getSequences())
1055 AlignSeq aseq = doGlobalNWAlignment(msq, sq, dnaOrProtein);
1056 if (bestm == null || aseq.getMaxScore() > bestscore)
1058 bestscore = aseq.getMaxScore();
1063 // System.out.println("Best Score for " + (matches.size() + 1) + " :"
1066 aligns.add(bestaseq);
1067 al.deleteSequence(bestm);
1069 for (int p = 0, pSize = seqs.size(); p < pSize; p++)
1071 SequenceI sq, sp = seqs.get(p);
1073 if ((q = ochains.indexOf(sp)) > -1)
1075 seqs.set(p, sq = matches.get(q));
1078 sq.setName(sp.getName());
1079 sq.setDescription(sp.getDescription());
1081 sq.transferAnnotation(sp,
1082 sp2sq = aligns.get(q).getMappingFromS1(false));
1083 aligs.add(aligns.get(q));
1085 for (int ap = 0; ap < annotations.size();)
1087 if (annotations.get(ap).sequenceRef == sp)
1093 if (removeOldAnnots)
1095 annotations.remove(ap);
1099 AlignmentAnnotation alan = annotations.remove(ap);
1100 alan.liftOver(sq, sp2sq);
1101 alan.setSequenceRef(sq);
1102 sq.addAlignmentAnnotation(alan);
1110 if (sq.getAnnotation() != null && sq.getAnnotation().length > 0)
1112 annotations.addAll(inspos == -1 ? annotations.size() : inspos,
1113 Arrays.asList(sq.getAnnotation()));
1118 return Arrays.asList(orig, repl, aligs);
1122 * compute the PID vector used by the redundancy filter.
1124 * @param originalSequences
1125 * - sequences in alignment that are to filtered
1127 * - null or strings to be analysed (typically, visible portion of
1128 * each sequence in alignment)
1130 * - first column in window for calculation
1132 * - last column in window for calculation
1134 * - if true then use ungapped sequence to compute PID
1135 * @return vector containing maximum PID for i-th sequence and any sequences
1136 * longer than that seuqence
1138 public static float[] computeRedundancyMatrix(
1139 SequenceI[] originalSequences, String[] omitHidden, int start,
1140 int end, boolean ungapped)
1142 int height = originalSequences.length;
1143 float[] redundancy = new float[height];
1144 int[] lngth = new int[height];
1145 for (int i = 0; i < height; i++)
1151 // long start = System.currentTimeMillis();
1153 SimilarityParams pidParams = new SimilarityParams(true, true, true,
1157 for (int i = 0; i < height; i++)
1160 for (int j = 0; j < i; j++)
1167 if (omitHidden == null)
1169 seqi = originalSequences[i].getSequenceAsString(start, end);
1170 seqj = originalSequences[j].getSequenceAsString(start, end);
1174 seqi = omitHidden[i];
1175 seqj = omitHidden[j];
1179 String ug = AlignSeq.extractGaps(Comparison.GapChars, seqi);
1180 lngth[i] = ug.length();
1188 String ug = AlignSeq.extractGaps(Comparison.GapChars, seqj);
1189 lngth[j] = ug.length();
1195 pid = (float) PIDModel.computePID(seqi, seqj, pidParams);
1197 // use real sequence length rather than string length
1198 if (lngth[j] < lngth[i])
1200 redundancy[j] = Math.max(pid, redundancy[j]);
1204 redundancy[i] = Math.max(pid, redundancy[i]);
1213 * calculate the mean score of the alignment
1214 * mean score is equal to the score of an alignmenet of two sequences with randomly shuffled AA sequence composited of the same AA as the two original sequences
1217 public void meanScore()
1219 //int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length();
1220 int length = indelfreeAstr1.length(); //both have the same length
1221 //create HashMap for counting residues in each sequence
1222 HashMap<Character, Integer> seq1ResCount = new HashMap<Character, Integer>();
1223 HashMap<Character, Integer> seq2ResCount = new HashMap<Character, Integer>();
1225 // for both sequences (String indelfreeAstr1 or 2) create a key for the residue and add 1 each time its encountered
1226 for (char residue: indelfreeAstr1.toCharArray())
1228 seq1ResCount.putIfAbsent(residue, 0);
1229 seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1);
1231 for (char residue: indelfreeAstr2.toCharArray())
1233 seq2ResCount.putIfAbsent(residue, 0);
1234 seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1);
1237 // meanscore = for each residue pair get the number of appearance and add (countA * countB * pairwiseScore(AB))
1238 // divide the meanscore by the sequence length afterwards
1239 float _meanscore = 0;
1240 for (char resA : seq1ResCount.keySet())
1242 for (char resB : seq2ResCount.keySet())
1244 int countA = seq1ResCount.get(resA);
1245 int countB = seq2ResCount.get(resB);
1247 float scoreAB = scoreMatrix.getPairwiseScore(resA, resB);
1249 _meanscore += countA * countB * scoreAB;
1252 _meanscore /= length;
1253 this.meanScore = _meanscore;
1256 public float getMeanScore()
1258 return this.meanScore;
1262 * calculate the hypothetic max score using the self-alignment of the sequences
1264 public void hypotheticMaxScore()
1268 for (char residue: indelfreeAstr1.toCharArray())
1270 _hmsA += scoreMatrix.getPairwiseScore(residue, residue);
1272 for (char residue: indelfreeAstr2.toCharArray())
1274 _hmsB += scoreMatrix.getPairwiseScore(residue, residue);
1276 this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB; // take the lower self alignment
1280 public int getHypotheticMaxScore()
1282 return this.hypotheticMaxScore;
1286 * create strings based of astr1 and astr2 but without gaps
1288 public void getIndelfreeAstr()
1290 int n = astr1.length(); // both have the same length
1291 for (int i = 0; i < n; i++)
1293 if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i))) // if both sequences dont have a gap -> add to indelfreeAstr
1295 this.indelfreeAstr1 += astr1.charAt(i);
1296 this.indelfreeAstr2 += astr2.charAt(i);
1302 * calculates the overall score of the alignment
1303 * preprescore = sum of all scores - all penalties
1304 * if preprescore < 1 ~ alignmentScore = Float.NaN >
1305 * alignmentScore = ((preprescore - meanScore) / (hypotheticMaxScore - meanScore)) * coverage
1307 public void scoreAlignment() throws RuntimeException
1312 hypotheticMaxScore();
1313 // cannot calculate score because denominator would be zero
1314 if (this.hypotheticMaxScore == this.meanScore)
1316 throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore));
1318 //int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length();
1319 int n = indelfreeAstr1.length();
1322 boolean aGapOpen = false;
1323 boolean bGapOpen = false;
1324 for (int i = 0; i < n; i++)
1326 char char1 = indelfreeAstr1.charAt(i);
1327 char char2 = indelfreeAstr2.charAt(i);
1328 boolean aIsLetter = Character.isLetter(char1);
1329 boolean bIsLetter = Character.isLetter(char2);
1330 if (aIsLetter && bIsLetter) // if pair -> get score
1332 score += scoreMatrix.getPairwiseScore(char1, char2);
1333 } else if (!aIsLetter && !bIsLetter) { // both are gap -> skip
1334 } else if ((!aIsLetter && aGapOpen) || (!bIsLetter && bGapOpen)) { // one side gapopen -> score - gap_extend
1335 score -= GAP_EXTEND_COST;
1336 } else { // no gap open -> score - gap_open
1337 score -= GAP_OPEN_COST;
1339 // adjust GapOpen status in both sequences
1340 aGapOpen = (!aIsLetter) ? true : false;
1341 bGapOpen = (!bIsLetter) ? true : false;
1344 float preprescore = score; // if this score < 1 --> alignment score = Float.NaN
1345 score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore);
1346 int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max}
1347 float coverage = (float) n / (float) _max[1]; // indelfreeAstr length / longest sequence length
1348 float prescore = score; // only debug
1351 System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %d", preprescore, prescore, _max[1], score, this.meanScore, this.hypotheticMaxScore));
1352 this.alignmentScore = (preprescore < 1) ? Float.NaN : score;