From 254e54378f7545b2fdba83794625ebda03cb52d2 Mon Sep 17 00:00:00 2001 From: MorellThomas Date: Fri, 4 Aug 2023 10:16:35 +0200 Subject: [PATCH] Fix alignment and score calculation for cc_analysis --- src/jalview/analysis/AlignSeq.java | 233 ++++++++++++++++++++-- src/jalview/analysis/Connectivity.java | 12 +- src/jalview/analysis/PaSiMap.java | 37 ++-- src/jalview/gui/PairwiseAlignPanel.java | 15 ++ src/jalview/math/Matrix.java | 134 +++++++++++++ src/jalview/math/MatrixI.java | 28 +++ src/jalview/math/MiscMath.java | 191 ++++++++++++++++++ src/jalview/math/SameLengthException.java | 35 ++++ src/jalview/math/ccAnalysis.java | 303 +++++++++++++++++++++++++++++ 9 files changed, 950 insertions(+), 38 deletions(-) create mode 100755 src/jalview/math/MiscMath.java create mode 100644 src/jalview/math/SameLengthException.java create mode 100755 src/jalview/math/ccAnalysis.java diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 10c7e6d..0731173 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -31,6 +31,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import jalview.math.MiscMath; import jalview.util.Comparison; import jalview.util.Format; import jalview.util.MapList; @@ -39,8 +40,10 @@ import jalview.util.MessageManager; import java.awt.Color; import java.awt.Graphics; import java.io.PrintStream; +import java.lang.IllegalArgumentException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.StringTokenizer; @@ -54,9 +57,12 @@ public class AlignSeq { private static final int MAX_NAME_LENGTH = 30; - private static final int GAP_OPEN_COST = 120; + //&! + //private static final int GAP_OPEN_COST = 120; + private static final int GAP_OPEN_COST = 10; - private static final int GAP_EXTEND_COST = 20; + //private static final int GAP_EXTEND_COST = 20; + private static final float GAP_EXTEND_COST = 0.5f; private static final int GAP_INDEX = -1; @@ -68,6 +74,8 @@ public class AlignSeq float[][] score; + float alignmentScore; + float[][] E; float[][] F; @@ -98,6 +106,10 @@ public class AlignSeq public String astr2 = ""; + public String indelfreeAstr1 = ""; + + public String indelfreeAstr2 = ""; + /** DOCUMENT ME!! */ public int seq1start; @@ -113,6 +125,10 @@ public class AlignSeq public float maxscore; + public float meanScore; //needed for PaSiMap + + public float hypotheticMaxScore; // needed for PaSiMap + int prev = 0; StringBuffer output = new StringBuffer(); @@ -165,6 +181,16 @@ public class AlignSeq } /** + * returns the overall score of the alignment + * + * @return + */ + public float getAlignmentScore() + { + return alignmentScore; + } + + /** * DOCUMENT ME! * * @return DOCUMENT ME! @@ -355,6 +381,7 @@ public class AlignSeq /** * DOCUMENT ME! */ + //&! not / 10 public void traceAlignment() { // Find the maximum score along the rhs or bottom row @@ -383,10 +410,18 @@ public class AlignSeq int i = maxi; int j = maxj; int trace; - maxscore = score[i][j] / 10f; + //maxscore = score[i][j] / 10f; + maxscore = score[i][j]; + + //&! get trailing gaps + while ((i < seq1.length - 1) || (j < seq2.length - 1)) + { + i++; + j++; + } + seq1end = i + 1; + seq2end = j + 1; - seq1end = maxi + 1; - seq2end = maxj + 1; aseq1 = new int[seq1.length + seq2.length]; aseq2 = new int[seq1.length + seq2.length]; @@ -396,6 +431,26 @@ public class AlignSeq count = (seq1.length + seq2.length) - 1; + //&! get trailing gaps + while ((i >= seq1.length) || (j >= seq2.length)) + { + if (i >= seq1.length) + { + aseq1[count] = GAP_INDEX; + sb1.append("-"); + aseq2[count] = seq2[j]; + sb2.append(s2str.charAt(j)); + } else if (j >= seq2.length) { + aseq1[count] = seq1[i]; + sb1.append(s1str.charAt(i)); + aseq2[count] = GAP_INDEX; + sb2.append("-"); + } + i--; + j--; + } + + while (i > 0 && j > 0) { aseq1[count] = seq1[i]; @@ -441,6 +496,21 @@ public class AlignSeq sb2.append(s2str.charAt(j)); } + //&! get initial gaps + while (j > 0 || i > 0) + { + if (j > 0) + { + sb1.append("-"); + sb2.append(s2str.charAt(j)); + j--; + } else if (i > 0) { + sb1.append(s1str.charAt(i)); + sb2.append("-"); + i--; + } + } + /* * we built the character strings backwards, so now * reverse them to convert to sequence strings @@ -588,12 +658,14 @@ public class AlignSeq * * @return DOCUMENT ME! */ + //&! not * 10 public int findTrace(int i, int j) { int t = 0; float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(j)); - float max = score[i - 1][j - 1] + (pairwiseScore * 10); + //float max = score[i - 1][j - 1] + (pairwiseScore * 10); + float max = score[i - 1][j - 1] + (pairwiseScore); if (F[i][j] > max) { @@ -631,6 +703,7 @@ public class AlignSeq /** * DOCUMENT ME! */ + //&! not * 10 public void calcScoreMatrix() { int n = seq1.length; @@ -638,7 +711,8 @@ public class AlignSeq // top left hand element score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0), - s2str.charAt(0)) * 10; + s2str.charAt(0)); + //s2str.charAt(0)) * 10; E[0][0] = -GAP_EXTEND_COST; F[0][0] = 0; @@ -652,7 +726,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0), s2str.charAt(j)); - score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST, + //score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST, + score[0][j] = max(pairwiseScore, -GAP_OPEN_COST, -GAP_EXTEND_COST); traceback[0][j] = 1; @@ -667,7 +742,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(0)); - score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]); + //score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]); + score[i][0] = max(pairwiseScore, E[i][0], F[i][0]); traceback[i][0] = -1; } @@ -683,7 +759,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(j)); - score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10), + //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10), + score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore), E[i][j], F[i][j]); traceback[i][j] = findTrace(i, j); } @@ -854,7 +931,6 @@ public class AlignSeq * @param type * AlignSeq.DNA or AlignSeq.PEP */ - //&! 1 v 1 needleman public static AlignSeq doGlobalNWAlignment(SequenceI s1, SequenceI s2, String type) { @@ -1131,4 +1207,139 @@ public class AlignSeq } return redundancy; } + + /** + * calculate the mean score of the alignment + * mean score is equal to the score of an alignmenet of two sequences with randomly shuffled AA sequence composited of the same AA as the two original sequences + * + */ + public void meanScore() + { + int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length(); + HashMap seq1ResCount = new HashMap(); + HashMap seq2ResCount = new HashMap(); + + for (char residue: s1str.toCharArray()) + { + seq1ResCount.putIfAbsent(residue, 0); + seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1); + } + for (char residue: s2str.toCharArray()) + { + seq2ResCount.putIfAbsent(residue, 0); + seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1); + } + + float _meanscore = 0; + for (char resA : seq1ResCount.keySet()) + { + for (char resB : seq2ResCount.keySet()) + { + int countA = seq1ResCount.get(resA); + int countB = seq2ResCount.get(resB); + + float scoreAB = scoreMatrix.getPairwiseScore(resA, resB); + + _meanscore += countA * countB * scoreAB; + } + } + _meanscore /= length; + this.meanScore = _meanscore; + } + + public float getMeanScore() + { + return this.meanScore; + } + + /** + * calculate the hypothetic max score using the self-alignment of the sequences + */ + public void hypotheticMaxScore() + { + float _hmsA = 0f; + float _hmsB = 0f; + for (char residue: s1str.toCharArray()) + { + _hmsA += scoreMatrix.getPairwiseScore(residue, residue); + } + for (char residue: s2str.toCharArray()) + { + _hmsB += scoreMatrix.getPairwiseScore(residue, residue); + } + this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB; + } + + public float getHypotheticMaxScore() + { + return this.hypotheticMaxScore; + } + + /** + * create strings based of astr1 and astr2 but without gaps + */ + public void getIndelfreeAstr() + { + int n = astr1.length(); // both have the same length + for (int i = 0; i < n; i++) + { + if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i))) + { + this.indelfreeAstr1 += astr1.charAt(i); + this.indelfreeAstr2 += astr2.charAt(i); + } + } + } + + /** + * calculates the overall score of the alignment + * alignmentScore = sum of all scores - all penalties + * + */ + public void scoreAlignment() throws RuntimeException + { + getIndelfreeAstr(); + meanScore(); + hypotheticMaxScore(); + // cannot calculate score because denominator would be zero + if (this.hypotheticMaxScore == this.meanScore) + { + throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore)); + } + int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length(); + + System.out.println(String.format("astr1: %c .. %c ; astr2: %c .. %c", astr1.toCharArray()[0], astr1.toCharArray()[astr1.length() - 1], astr2.toCharArray()[0], astr2.toCharArray()[astr2.length() - 1])); + + float score = 0; + boolean aGapOpen = false; + boolean bGapOpen = false; + for (int i = 0; i < n; i++) + { + char char1 = astr1.charAt(i); + char char2 = astr2.charAt(i); + boolean aIsLetter = Character.isLetter(char1); + boolean bIsLetter = Character.isLetter(char2); + if (aIsLetter && bIsLetter) // if pair -> get score + { + score += scoreMatrix.getPairwiseScore(char1, char2); + } else if (!aIsLetter && !bIsLetter) { // both are gap -> skip + } else if ((!aIsLetter && aGapOpen) || (!bIsLetter && bGapOpen)) { // one side gapopen -> score - gap_extend + score -= GAP_EXTEND_COST; + } else { // no gap open -> score - gap_open + score -= GAP_OPEN_COST; + } + aGapOpen = (!aIsLetter) ? true : false; + bGapOpen = (!bIsLetter) ? true : false; + } + + float preprescore = score; + score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore); + int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max} + float coverage = (float) indelfreeAstr1.length() / (float) _max[1]; + float prescore = score; + score *= coverage; + + System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %f", preprescore, prescore, indelfreeAstr1.length(), score, this.meanScore, this.hypotheticMaxScore)); + this.alignmentScore = (preprescore < 1) ? Float.NaN : score; + } } diff --git a/src/jalview/analysis/Connectivity.java b/src/jalview/analysis/Connectivity.java index 25f9626..8d56b43 100644 --- a/src/jalview/analysis/Connectivity.java +++ b/src/jalview/analysis/Connectivity.java @@ -57,27 +57,23 @@ public class Connectivity connectivity.putIfAbsent(sequences[j], 0); int iOld = connectivity.get(sequences[i]); int jOld = connectivity.get(sequences[j]); - // count the connection if its score is bigger than 0.1 - if (scores[i][j] >= 0.1) // what is cutoff instead of just 0.1? + // count the connection if its score is not NaN + if (!Float.isNaN(scores[i][j])) { connectivity.put(sequences[i], ++iOld); connectivity.put(sequences[j], ++jOld); } } } - //Debug - for (int i = 0; i < sequences.length; i++) - { - System.out.println(connectivity.get(sequences[i])); - } // if a sequence has too few connections, abort connectivity.forEach((sequence, connection) -> { + System.out.println(String.format("%s: %d", sequence.getName(), connection)); if (connection < dim) { // a popup saying that it failed would be nice - throw new ConnectivityException(sequence.getName(), connection, dim); + //throw new ConnectivityException(sequence.getName(), connection, dim); } } ); diff --git a/src/jalview/analysis/PaSiMap.java b/src/jalview/analysis/PaSiMap.java index d0362b6..e2a9882 100755 --- a/src/jalview/analysis/PaSiMap.java +++ b/src/jalview/analysis/PaSiMap.java @@ -28,6 +28,8 @@ import jalview.datamodel.Point; import jalview.datamodel.SequenceI; import jalview.gui.PairwiseAlignPanel; import jalview.gui.PaSiMapPanel; +import jalview.math.ccAnalysis; +import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.viewmodel.AlignmentViewport; @@ -253,31 +255,28 @@ public class PaSiMap implements Runnable // run needleman regardless if aligned or not // gui.PairwiseAlignPanel <++> PairwiseAlignPanel alignment = new PairwiseAlignPanel(seqs); - float[][] scores = alignment.getScores(); - System.out.println(scores[8][18]); // do scores have to be divided by totscore?? + float[][] scores = alignment.getAlignmentScores(); //bigger index first -- eg scores[14][13] + /** TODO scores still not same as in original pasimap */ + System.out.println(scores[8][0]); - // what is connectivity and why? Hashtable connectivity = seqs.calculateConnectivity(scores, dim); + pairwiseScores = new Matrix(scores); + System.out.println(pairwiseScores.getValue(8,0)); + pairwiseScores.fillDiagonal(); + System.out.println(pairwiseScores.getValue(8,0)); + /** TODO implement cc_analysis */ + pairwiseScores = ccAnalysis.run(pairwiseScores); - /** pca code */ - /* - * sequence pairwise similarity scores - */ - //&! analysis/scoremodels/ScoreMatrix computeSimilarity - //pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); - /* - * tridiagonal matrix - */ - //tridiagonal = pairwiseScores.copy(); - //tridiagonal.tred(); + /** code like in pca, needed for plot */ + tridiagonal = pairwiseScores.copy(); + tridiagonal.tred(); + eigenMatrix = tridiagonal.copy(); + eigenMatrix.tqli(); + System.out.println(getDetails()); + - /* - * the diagonalization matrix - */ - //eigenMatrix = tridiagonal.copy(); - //eigenMatrix.tqli(); } catch (Exception q) { Console.error("Error computing PaSiMap: " + q.getMessage()); diff --git a/src/jalview/gui/PairwiseAlignPanel.java b/src/jalview/gui/PairwiseAlignPanel.java index c981b4c..1cca215 100755 --- a/src/jalview/gui/PairwiseAlignPanel.java +++ b/src/jalview/gui/PairwiseAlignPanel.java @@ -45,6 +45,8 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel private float[][] scores; + private float[][] alignmentScores; // scores used by PaSiMap + AlignmentViewport av; Vector sequences; @@ -85,12 +87,15 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel : AlignSeq.PEP; float[][] scores = new float[seqs.length][seqs.length]; + float[][] alignmentScores = new float[seqs.length][seqs.length]; double totscore = 0D; int count = seqs.length; boolean first = true; for (int i = 1; i < count; i++) { + // fill diagonal alignmentScores with Float.NaN + alignmentScores[i-1][i-1] = Float.NaN; for (int j = 0; j < i; j++) { AlignSeq as = new AlignSeq(seqs[i], seqStrings[i], seqs[j], @@ -103,6 +108,7 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel as.calcScoreMatrix(); as.traceAlignment(); + as.scoreAlignment(); if (!first) { @@ -112,6 +118,8 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel first = false; as.printAlignment(System.out); scores[i][j] = as.getMaxScore() / as.getASeq1().length; + alignmentScores[i][j] = as.getAlignmentScore(); + //&! totscore = totscore + scores[i][j]; textarea.append(as.getOutput()); @@ -119,8 +127,10 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel sequences.add(as.getAlignedSeq2()); } } + alignmentScores[count-1][count-1] = Float.NaN; this.scores = scores; + this.alignmentScores = alignmentScores; if (count > 2) { @@ -133,6 +143,11 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel return this.scores; } + public float[][] getAlignmentScores() + { + return this.alignmentScores; + } + /** * Prints a matrix of seqi-seqj pairwise alignment scores to sysout * diff --git a/src/jalview/math/Matrix.java b/src/jalview/math/Matrix.java index b22bf4e..5eaa7c3 100755 --- a/src/jalview/math/Matrix.java +++ b/src/jalview/math/Matrix.java @@ -24,6 +24,7 @@ import jalview.util.Format; import jalview.util.MessageManager; import java.io.PrintStream; +import java.lang.Math; import java.util.Arrays; /** @@ -107,6 +108,32 @@ public class Matrix implements MatrixI } } + public Matrix(float[][] values) + { + this.rows = values.length; + this.cols = this.rows == 0 ? 0 : values[0].length; + + /* + * make a copy of the values array, for immutability + */ + this.value = new double[rows][]; + int i = 0; + for (float[] row : values) + { + if (row != null) + { + value[i] = new double[row.length]; + int j = 0; + for (float oldValue : row) + { + value[i][j] = oldValue; + j++; + } + } + i++; + } + } + @Override public MatrixI transpose() { @@ -1022,4 +1049,111 @@ public class Matrix implements MatrixI } return true; } + + /** + * Returns a copy in which every value in the matrix is its absolute + * + */ + @Override + public MatrixI absolute() + { + MatrixI copy = this.copy(); + for (int i = 0; i < copy.width(); i++) + { + double[] row = copy.getRow(i); + if (row != null) + { + for (int j = 0; j < row.length; j++) + { + row[j] = Math.abs(row[j]); + } + } + } + return copy; + } + + /** + * Returns the mean of each row + * + */ + @Override + public double[] meanRow() + { + double[] mean = new double[rows]; + int i = 0; + for (double[] row : value) + { + if (row != null) + { + mean[i++] = MiscMath.mean(row); + } + } + return mean; + } + + /** + * Returns the mean of each column + * + */ + @Override + public double[] meanCol() + { + double[] mean = new double[cols]; + for (int j = 0; j < cols; j++) + { + double[] column = getColumn(j); + if (column != null) + { + mean[j] = MiscMath.mean(column); + } + } + return mean; + } + + /** + * fills up a diagonal matrix with its transposed copy + * !other side should be filled with 0 + * + * TODO check on which side it was diagonal and only do calculations for the other side + */ + @Override + public void fillDiagonal() + { + int n = this.rows; + int m = this.cols; + MatrixI copy = this.transpose(); + for (int i = 0; i < n; i++) + { + for (int j = 0; j < m; j++) + { + if (i != j) + { + this.addValue(i, j, copy.getValue(i,j)); + } + } + } + } + + /** + * counts the number of Double.NaN in the matrix + * + * @return + */ + @Override + public int countNaN() + { + int NaN = 0; + for (int i = 0; i < this.rows; i++) + { + for (int j = 0; j < this.cols; j++) + { + if (Double.isNaN(this.getValue(i,j))) + { + NaN++; + } + } + } + return NaN; + } + } diff --git a/src/jalview/math/MatrixI.java b/src/jalview/math/MatrixI.java index e98007e..61d73dc 100644 --- a/src/jalview/math/MatrixI.java +++ b/src/jalview/math/MatrixI.java @@ -170,4 +170,32 @@ public interface MatrixI * @return */ boolean equals(MatrixI m2, double delta); + + /** + * Returns a copy in which every value in the matrix is its absolute + */ + MatrixI absolute(); + + /** + * Returns the mean of each row + */ + double[] meanRow(); + + /** + * Returns the mean of each column + */ + double[] meanCol(); + + /** + * fills up a diagonal matrix with its transposed copy + * !other side should be filled with either 0 or Double.NaN + */ + void fillDiagonal(); + + /** + * counts the number of Double.NaN in the matrix + * + * @return + */ + int countNaN(); } diff --git a/src/jalview/math/MiscMath.java b/src/jalview/math/MiscMath.java new file mode 100755 index 0000000..4fde68a --- /dev/null +++ b/src/jalview/math/MiscMath.java @@ -0,0 +1,191 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.math; + +import java.lang.Math; +import java.util.Arrays; + +/** + * A collection of miscellaneous mathematical operations + * @AUTHOR MorellThomas + */ +public class MiscMath +{ + /** + * calculates the mean of an array + * + * @param m ~ array + * @return + * TODO whats with nans?? + */ + public static double mean(double[] m) + { + double sum = 0; + for (int i = 0; i < m.length; i++) + { + if (!Double.isNaN(m[i])) + { + sum += m[i]; + } + } + return sum / m.length; + } + + /** + * calculates the square root of each element in an array + * + * @param m ~ array + * + * @return + * TODO + * make general with function passed -> apply function to each element + */ + public static double[] sqrt(double[] m) + { + double[] sqrts = new double[m.length]; + for (int i = 0; i < m.length; i++) + { + sqrts[i] = Math.sqrt(m[i]); + } + return sqrts; + } + + /** + * calculate element wise multiplication of two arrays + * + * @param a ~ array + * @param b ~ array + * + * @return + */ + public static double[] elementwiseMultiply(byte[] a, double[] b) throws RuntimeException + { + if (a.length != b.length) + { + throw new SameLengthException(a.length, b.length); + } + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = a[i] * b[i]; + } + return result; + } + public static double[] elementwiseMultiply(double[] a, double[] b) throws RuntimeException + { + if (a.length != b.length) + { + throw new SameLengthException(a.length, b.length); + } + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = a[i] * b[i]; + } + return result; + } + public static byte[] elementwiseMultiply(byte[] a, byte[] b) throws RuntimeException + { + if (a.length != b.length) + { + throw new SameLengthException(a.length, b.length); + } + byte[] result = new byte[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = (byte) (a[i] * b[i]); + } + return result; + } + + /** + * calculate element wise division of two arrays + * + * @param a ~ array + * @param b ~ array + * + * @return + */ + public static double[] elementwiseDivide(double[] a, double[] b) throws RuntimeException + { + if (a.length != b.length) + { + throw new SameLengthException(a.length, b.length); + } + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = a[i] / b[i]; + } + return result; + } + + /** + * returns true if two arrays are element wise within a tolerance + * + * @param a ~ array + * @param b ~ array + * @param rtol ~ relative tolerance + * @param atol ~ absolute tolerance + * @param equalNAN ~ whether NaN at the same position return true + * + * @return + */ + public static boolean allClose(double[] a, double[] b, double rtol, double atol, boolean equalNAN) + { + boolean areEqual = true; + for (int i = 0; i < a.length; i++) + { + if (equalNAN && (Double.isNaN(a[i]) && Double.isNaN(b[i]))) + { + continue; + } + if (Math.abs(a[i] - b[i]) > (atol + rtol * Math.abs(b[i]))) + { + areEqual = false; + break; + } + } + return areEqual; + } + + /** + * returns the index of the maximum and the maximum value of an array + * + * @pararm a ~ array + * + * @returns + */ + public static int[] findMax(int[] a) + { + int max = 0; + int maxIndex = 0; + for (int i = 0; i < a.length; i++) + { + if (a[i] > max) + { + max = a[i]; + maxIndex = i; + } + } + return new int[]{maxIndex, max}; + } +} diff --git a/src/jalview/math/SameLengthException.java b/src/jalview/math/SameLengthException.java new file mode 100644 index 0000000..9621827 --- /dev/null +++ b/src/jalview/math/SameLengthException.java @@ -0,0 +1,35 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.math; + +public class SameLengthException extends RuntimeException +{ + public SameLengthException(int lengthA, int lengthB) + { + this("Your arrays do not have the same length!", lengthA, lengthB); + } + + public SameLengthException(String message, int lengthA, int lengthB) + { + super(String.format("%s (%d and %d)", message, lengthA, lengthB)); + } + +} diff --git a/src/jalview/math/ccAnalysis.java b/src/jalview/math/ccAnalysis.java new file mode 100755 index 0000000..22ba162 --- /dev/null +++ b/src/jalview/math/ccAnalysis.java @@ -0,0 +1,303 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ + +/* +* Copyright 2018-2022 Kathy Su, Kay Diederichs +* +* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along with this program. If not, see . +*/ + +/** +* Ported from https://doi.org/10.1107/S2059798317000699 by +* @AUTHOR MorellThomas +*/ + +package jalview.math; + +import jalview.bin.Console; + +import java.lang.Math; +import java.lang.System; +import java.util.Arrays; +import java.util.ArrayList; + +/** + * A class to model rectangular matrices of double values and operations on them + */ +public class ccAnalysis +{ + + /** TODO + * DOCUMENT ME + * + * @param hSigns ~ <++> + * @param scores ~ input score matrix + * + * @return distrustScores + */ + private static int[] initialiseDistrusts(byte[] hSigns, MatrixI scores) + { + int[] distrustScores = new int[scores.width()]; + + // loop over symmetric matrix + for (int i = 0; i < scores.width(); i++) + { + byte hASign = hSigns[i]; + int conHypNum = 0; + int proHypNum = 0; + + for (int j = 0; j < scores.width(); j++) + { + double cell = scores.getRow(i)[j]; // value at [i][j] in scores + byte hBSign = hSigns[j]; + if (!Double.isNaN(cell)) + { + byte cellSign = (byte) Math.signum(cell); + if (cellSign == hASign * hBSign) + { + proHypNum++; + } else { + conHypNum++; + } + } + } + distrustScores[i] = conHypNum - proHypNum; + } + return distrustScores; + } + + /** TODO + * DOCUMENT ME + * + * @param hSigns ~ <++> + * @param distrustScores ~ <++> + * @param scores ~ input score matrix + * + * @return hSigns + */ + private static byte[] optimiseHypothesis(byte[] hSigns, int[] distrustScores, MatrixI scores) + { + // get maximum distrust score + int[] maxes = MiscMath.findMax(distrustScores); + int maxDistrustIndex = maxes[0]; + int maxDistrust = maxes[1]; + + // if hypothesis is not optimal yet + if (maxDistrust > 0) + { + //toggle sign for hI with maximum distrust + hSigns[maxDistrustIndex] *= -1; + // update distrust at same position + distrustScores[maxDistrustIndex] *= -1; + + // also update distrust scores for all hI that were not changed + byte hASign = hSigns[maxDistrustIndex]; + for (int NOTmaxDistrustIndex = 0; NOTmaxDistrustIndex < distrustScores.length; NOTmaxDistrustIndex++) + { + if (NOTmaxDistrustIndex != maxDistrustIndex) + { + byte hBSign = hSigns[NOTmaxDistrustIndex]; + double cell = scores.getRow(maxDistrustIndex)[NOTmaxDistrustIndex]; + + // distrust only changed if not NaN + if (!Double.isNaN(cell)) + { + byte cellSign = (byte) Math.signum(cell); + // if sign of cell matches hypothesis decrease distrust by 2 because 1 more value supporting and 1 less contradicting + // else increase by 2 + if (cellSign == hASign * hBSign) + { + distrustScores[NOTmaxDistrustIndex] -= 2; + } else { + distrustScores[NOTmaxDistrustIndex] += 2; + } + } + } + } + //further optimisation necessary + return optimiseHypothesis(hSigns, distrustScores, scores); + + } else { + return hSigns; + } + } + + /** TODO + * DOCUMENT ME + * + * runs analysis + * + * @param scores ~ score matrix + * + * @return + */ + public static MatrixI run (MatrixI scores) + { + MatrixI eigenMatrix = scores.copy(); + try + { + /* + * Calculate correction factor for 2nd and higher eigenvalue(s). + * This correction is NOT needed for the 1st eigenvalue, because the + * unknown (=NaN) values of the matrix are approximated by presuming + * 1-dimensional vectors as the basis of the matrix interpretation as dot + * products. + */ + MatrixI scoresOld = scores.copy(); + //&! debug + System.out.println("input:"); + eigenMatrix.print(System.out, "%8.2f"); + int matrixWidth = eigenMatrix.width(); // square matrix, so width == height + int matrixElementsTotal = matrixWidth * 2; //total number of elemts + + int correlationFactor = (matrixElementsTotal - eigenMatrix.countNaN()) / matrixElementsTotal; + + /* + * Calculate hypothetical value (1-dimensional vector) h_i for each + * dataset by interpreting the given correlation coefficients as scalar + * products. + */ + + /* + * Memory for current hypothesis concerning sign of each h_i. + * List of signs for all h_i in the encoding: + * * 1: positive + * * 0: zero + * * -1: negative + * Initial hypothesis: all signs are positive. + */ + byte[] hSigns = new byte[matrixWidth]; + Arrays.fill(hSigns, (byte) 1); + + //Estimate signs for each h_i by refining hypothesis on signs. + hSigns = optimiseHypothesis(hSigns, initialiseDistrusts(hSigns, eigenMatrix), eigenMatrix); + + //Estimate absolute values for each h_i by determining sqrt of mean of + //non-NaN absolute values for every row. + //<++> hAbs = //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) + double[] hAbs = MiscMath.sqrt(eigenMatrix.absolute().meanRow()); //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) + + //Combine estimated signs with absolute values in obtain total value for + //each h_i. + double[] hValues = MiscMath.elementwiseMultiply(hSigns, hAbs); + //<++>hValues.reshape((1,matrixWidth)); // doesnt it already look like this + + /*Complement symmetric matrix by using the scalar products of estimated + *values of h_i to replace NaN-cells. + *Matrix positions that have estimated values + *(only for diagonal and upper off-diagonal values, due to the symmetry + *the positions of the lower-diagonal values can be inferred). + List of tuples (row_idx, column_idx).*/ + + ArrayList estimatedPositions = new ArrayList(); + + // for off-diagonal cells + for (int rowIndex = 0; rowIndex < matrixWidth - 1; rowIndex++) + { + for (int columnIndex = rowIndex + 1; columnIndex < matrixWidth; columnIndex++) + { + double cell = eigenMatrix.getValue(rowIndex, columnIndex); + if (Double.isNaN(cell)) + { + //calculate scalar product as new cell value + cell = hValues[rowIndex] * hValues[columnIndex]; // something is wrong with hAbs andhValues and everything!!!!!!!!!!!! + //fill in new value in cell and symmetric partner + eigenMatrix.setValue(rowIndex, columnIndex, cell); + eigenMatrix.setValue(columnIndex, rowIndex, cell); + //save positions of estimated values + estimatedPositions.add(new int[]{rowIndex, columnIndex}); + } + } + } + + // for diagonal cells + for (int diagonalIndex = 0; diagonalIndex < matrixWidth; diagonalIndex++) + { + double cell = Math.pow(hValues[diagonalIndex], 2); + eigenMatrix.setValue(diagonalIndex, diagonalIndex, cell); + estimatedPositions.add(new int[]{diagonalIndex, diagonalIndex}); + } + + /*Refine total values of each h_i: + *Initialise h_values of the hypothetical non-existant previous iteration + *with the correct format but with impossible values. + Needed for exit condition of otherwise endless loop.*/ + + double[] hValuesOld = new double[matrixWidth]; + + int iterationCount = 0; + + // repeat unitl values of h do not significantly change anymore + while (true) + { + for (int hIndex = 0; hIndex < matrixWidth; hIndex++) + { + //@python newH = np.sum(hValues * eigenMatrix[hIndex]) / np.sum(hValues ** 2) + double newH = Arrays.stream(MiscMath.elementwiseMultiply(hValues, eigenMatrix.getRow(hIndex))).sum() / Arrays.stream(MiscMath.elementwiseMultiply(hValues, hValues)).sum(); + hValues[hIndex] = newH; + + } + //update values of estimated positions + for (int[] pair : estimatedPositions) // pair ~ row, col + { + double newVal = hValues[pair[0]] * hValues[pair[1]]; + eigenMatrix.setValue(pair[0], pair[1], newVal); + eigenMatrix.setValue(pair[1], pair[0], newVal); + } + + iterationCount++; + + //exit loop as soon as new values are similar to the last iteration + if (MiscMath.allClose(hValues, hValuesOld, 0d, 1e-05d, false)) + { + break; + } + + //save hValues for comparison in the next iteration + System.arraycopy(hValues, 0, hValuesOld, 0, hValues.length); + } + + //----------------------------- + //Use complemented symmetric matrix to calculate final representative + //vectors. + //&! debug + System.out.println("after estimating:"); + eigenMatrix.print(System.out, "%8.2f"); + + //Eigendecomposition. + //eigenMatrix.tqli(); + + //&! debug + //eigenMatrix.print(System.out, "%8.2f"); + + + } catch (Exception q) + { + Console.error("Error computing cc_analysis: " + q.getMessage()); + q.printStackTrace(); + } + return eigenMatrix; + } +} -- 1.7.10.2