X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignSeq.java;h=4ec0457c2980a7977245355c48bfc8d2cf1410cc;hb=6c36212c1e16557e6afb1b4dba9c28864d52ad4a;hp=65fd1100b8e8a7f2cdc33f1d1a90f80aaa3076bf;hpb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;p=jalview.git diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 65fd110..4ec0457 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -31,6 +31,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import jalview.math.MiscMath; import jalview.util.Comparison; import jalview.util.Format; import jalview.util.MapList; @@ -39,8 +40,10 @@ import jalview.util.MessageManager; import java.awt.Color; import java.awt.Graphics; import java.io.PrintStream; +import java.lang.IllegalArgumentException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.StringTokenizer; @@ -54,9 +57,12 @@ public class AlignSeq { private static final int MAX_NAME_LENGTH = 30; + //&! private static final int GAP_OPEN_COST = 120; + //private static final int GAP_OPEN_COST = 100; private static final int GAP_EXTEND_COST = 20; + //private static final int GAP_EXTEND_COST = 5; private static final int GAP_INDEX = -1; @@ -68,6 +74,8 @@ public class AlignSeq float[][] score; + float alignmentScore; + float[][] E; float[][] F; @@ -98,6 +106,10 @@ public class AlignSeq public String astr2 = ""; + public String indelfreeAstr1 = ""; + + public String indelfreeAstr2 = ""; + /** DOCUMENT ME!! */ public int seq1start; @@ -113,6 +125,10 @@ public class AlignSeq public float maxscore; + public float meanScore; //needed for PaSiMap + + public int hypotheticMaxScore; // needed for PaSiMap + int prev = 0; StringBuffer output = new StringBuffer(); @@ -165,6 +181,16 @@ public class AlignSeq } /** + * returns the overall score of the alignment + * + * @return + */ + public float getAlignmentScore() + { + return alignmentScore; + } + + /** * DOCUMENT ME! * * @return DOCUMENT ME! @@ -385,8 +411,6 @@ public class AlignSeq int trace; maxscore = score[i][j] / 10f; - seq1end = maxi + 1; - seq2end = maxj + 1; aseq1 = new int[seq1.length + seq2.length]; aseq2 = new int[seq1.length + seq2.length]; @@ -396,6 +420,7 @@ public class AlignSeq count = (seq1.length + seq2.length) - 1; + while (i > 0 && j > 0) { aseq1[count] = seq1[i]; @@ -441,6 +466,146 @@ public class AlignSeq sb2.append(s2str.charAt(j)); } + + /* + * we built the character strings backwards, so now + * reverse them to convert to sequence strings + */ + astr1 = sb1.reverse().toString(); + astr2 = sb2.reverse().toString(); + } + + /** + * DOCUMENT ME! + */ + public void traceAlignmentWithEndGaps() + { + // Find the maximum score along the rhs or bottom row + float max = -Float.MAX_VALUE; + + for (int i = 0; i < seq1.length; i++) + { + if (score[i][seq2.length - 1] > max) + { + max = score[i][seq2.length - 1]; + maxi = i; + maxj = seq2.length - 1; + } + } + + for (int j = 0; j < seq2.length; j++) + { + if (score[seq1.length - 1][j] > max) + { + max = score[seq1.length - 1][j]; + maxi = seq1.length - 1; + maxj = j; + } + } + + int i = maxi; + int j = maxj; + int trace; + maxscore = score[i][j] / 10f; + + //&! get trailing gaps + while ((i < seq1.length - 1) || (j < seq2.length - 1)) + { + i++; + j++; + } + seq1end = i + 1; + seq2end = j + 1; + + + aseq1 = new int[seq1.length + seq2.length]; + aseq2 = new int[seq1.length + seq2.length]; + + StringBuilder sb1 = new StringBuilder(aseq1.length); + StringBuilder sb2 = new StringBuilder(aseq2.length); + + count = (seq1.length + seq2.length) - 1; + + //&! get trailing gaps + while ((i >= seq1.length) || (j >= seq2.length)) + { + if (i >= seq1.length) + { + aseq1[count] = GAP_INDEX; + sb1.append("-"); + aseq2[count] = seq2[j]; + sb2.append(s2str.charAt(j)); + } else if (j >= seq2.length) { + aseq1[count] = seq1[i]; + sb1.append(s1str.charAt(i)); + aseq2[count] = GAP_INDEX; + sb2.append("-"); + } + i--; + j--; + } + + + while (i > 0 && j > 0) + { + aseq1[count] = seq1[i]; + sb1.append(s1str.charAt(i)); + aseq2[count] = seq2[j]; + sb2.append(s2str.charAt(j)); + + trace = findTrace(i, j); + + if (trace == 0) + { + i--; + j--; + } + else if (trace == 1) + { + j--; + aseq1[count] = GAP_INDEX; + sb1.replace(sb1.length() - 1, sb1.length(), "-"); + } + else if (trace == -1) + { + i--; + aseq2[count] = GAP_INDEX; + sb2.replace(sb2.length() - 1, sb2.length(), "-"); + } + + count--; + } + + seq1start = i + 1; + seq2start = j + 1; + + if (aseq1[count] != GAP_INDEX) + { + aseq1[count] = seq1[i]; + sb1.append(s1str.charAt(i)); + } + + if (aseq2[count] != GAP_INDEX) + { + aseq2[count] = seq2[j]; + sb2.append(s2str.charAt(j)); + } + + //&! get initial gaps + while (j > 0 || i > 0) + { + if (j > 0) + { + sb1.append("-"); + sb2.append(s2str.charAt(j)); + j--; + } else if (i > 0) { + sb1.append(s1str.charAt(i)); + sb2.append("-"); + i--; + } + } + /* * we built the character strings backwards, so now * reverse them to convert to sequence strings @@ -890,7 +1055,8 @@ public class AlignSeq pdbpos++; } - if (allowmismatch || c1 == c2) + // ignore case differences + if (allowmismatch || (c1 == c2) || (Math.abs(c2-c1)==('a'-'A'))) { // extend mapping interval if (lp1 + 1 != alignpos || lp2 + 1 != pdbpos) @@ -1130,4 +1296,147 @@ public class AlignSeq } return redundancy; } + + /** + * calculate the mean score of the alignment + * mean score is equal to the score of an alignmenet of two sequences with randomly shuffled AA sequence composited of the same AA as the two original sequences + * + */ + public void meanScore() + { + //int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length(); + int length = indelfreeAstr1.length(); //both have the same length + //create HashMap for counting residues in each sequence + HashMap seq1ResCount = new HashMap(); + HashMap seq2ResCount = new HashMap(); + + // for both sequences (String indelfreeAstr1 or 2) create a key for the residue and add 1 each time its encountered + for (char residue: indelfreeAstr1.toCharArray()) + { + seq1ResCount.putIfAbsent(residue, 0); + seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1); + } + for (char residue: indelfreeAstr2.toCharArray()) + { + seq2ResCount.putIfAbsent(residue, 0); + seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1); + } + + // meanscore = for each residue pair get the number of appearance and add (countA * countB * pairwiseScore(AB)) + // divide the meanscore by the sequence length afterwards + float _meanscore = 0; + for (char resA : seq1ResCount.keySet()) + { + for (char resB : seq2ResCount.keySet()) + { + int countA = seq1ResCount.get(resA); + int countB = seq2ResCount.get(resB); + + float scoreAB = scoreMatrix.getPairwiseScore(resA, resB); + + _meanscore += countA * countB * scoreAB; + } + } + _meanscore /= length; + this.meanScore = _meanscore; + } + + public float getMeanScore() + { + return this.meanScore; + } + + /** + * calculate the hypothetic max score using the self-alignment of the sequences + */ + public void hypotheticMaxScore() + { + int _hmsA = 0; + int _hmsB = 0; + for (char residue: indelfreeAstr1.toCharArray()) + { + _hmsA += scoreMatrix.getPairwiseScore(residue, residue); + } + for (char residue: indelfreeAstr2.toCharArray()) + { + _hmsB += scoreMatrix.getPairwiseScore(residue, residue); + } + this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB; // take the lower self alignment + + } + + public int getHypotheticMaxScore() + { + return this.hypotheticMaxScore; + } + + /** + * create strings based of astr1 and astr2 but without gaps + */ + public void getIndelfreeAstr() + { + int n = astr1.length(); // both have the same length + for (int i = 0; i < n; i++) + { + if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i))) // if both sequences dont have a gap -> add to indelfreeAstr + { + this.indelfreeAstr1 += astr1.charAt(i); + this.indelfreeAstr2 += astr2.charAt(i); + } + } + } + + /** + * calculates the overall score of the alignment + * preprescore = sum of all scores - all penalties + * if preprescore < 1 ~ alignmentScore = Float.NaN > + * alignmentScore = ((preprescore - meanScore) / (hypotheticMaxScore - meanScore)) * coverage + */ + public void scoreAlignment() throws RuntimeException + { + + getIndelfreeAstr(); + meanScore(); + hypotheticMaxScore(); + // cannot calculate score because denominator would be zero + if (this.hypotheticMaxScore == this.meanScore) + { + throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore)); + } + //int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length(); + int n = indelfreeAstr1.length(); + + float score = 0; + boolean aGapOpen = false; + boolean bGapOpen = false; + for (int i = 0; i < n; i++) + { + char char1 = indelfreeAstr1.charAt(i); + char char2 = indelfreeAstr2.charAt(i); + boolean aIsLetter = Character.isLetter(char1); + boolean bIsLetter = Character.isLetter(char2); + if (aIsLetter && bIsLetter) // if pair -> get score + { + score += scoreMatrix.getPairwiseScore(char1, char2); + } else if (!aIsLetter && !bIsLetter) { // both are gap -> skip + } else if ((!aIsLetter && aGapOpen) || (!bIsLetter && bGapOpen)) { // one side gapopen -> score - gap_extend + score -= GAP_EXTEND_COST; + } else { // no gap open -> score - gap_open + score -= GAP_OPEN_COST; + } + // adjust GapOpen status in both sequences + aGapOpen = (!aIsLetter) ? true : false; + bGapOpen = (!bIsLetter) ? true : false; + } + + float preprescore = score; // if this score < 1 --> alignment score = Float.NaN + score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore); + int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max} + float coverage = (float) n / (float) _max[1]; // indelfreeAstr length / longest sequence length + float prescore = score; // only debug + score *= coverage; + + System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %d", preprescore, prescore, _max[1], score, this.meanScore, this.hypotheticMaxScore)); + this.alignmentScore = (preprescore < 1) ? Float.NaN : score; + } }