From: MorellThomas Date: Thu, 7 Sep 2023 07:35:18 +0000 (+0200) Subject: Implemented least-squares optimisation in ccAnalysis X-Git-Tag: Release_2_11_4_0~31^2~38^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=4ae5956e606a045ea9da778f4829cb6d2a157180;p=jalview.git Implemented least-squares optimisation in ccAnalysis --- diff --git a/j11lib/commons-math3-3.6.1.jar b/j11lib/commons-math3-3.6.1.jar new file mode 100644 index 0000000..0ff582c Binary files /dev/null and b/j11lib/commons-math3-3.6.1.jar differ diff --git a/j8lib/commons-math3-3.6.1.jar b/j8lib/commons-math3-3.6.1.jar new file mode 100644 index 0000000..0ff582c Binary files /dev/null and b/j8lib/commons-math3-3.6.1.jar differ diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 0731173..420aa78 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -58,11 +58,11 @@ public class AlignSeq private static final int MAX_NAME_LENGTH = 30; //&! - //private static final int GAP_OPEN_COST = 120; - private static final int GAP_OPEN_COST = 10; + private static final int GAP_OPEN_COST = 120; + //private static final int GAP_OPEN_COST = 100; - //private static final int GAP_EXTEND_COST = 20; - private static final float GAP_EXTEND_COST = 0.5f; + private static final int GAP_EXTEND_COST = 20; + //private static final int GAP_EXTEND_COST = 5; private static final int GAP_INDEX = -1; @@ -127,7 +127,7 @@ public class AlignSeq public float meanScore; //needed for PaSiMap - public float hypotheticMaxScore; // needed for PaSiMap + public int hypotheticMaxScore; // needed for PaSiMap int prev = 0; @@ -410,8 +410,8 @@ public class AlignSeq int i = maxi; int j = maxj; int trace; - //maxscore = score[i][j] / 10f; - maxscore = score[i][j]; + maxscore = score[i][j] / 10f; + //maxscore = score[i][j]; //&! get trailing gaps while ((i < seq1.length - 1) || (j < seq2.length - 1)) @@ -664,8 +664,8 @@ public class AlignSeq int t = 0; float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(j)); - //float max = score[i - 1][j - 1] + (pairwiseScore * 10); - float max = score[i - 1][j - 1] + (pairwiseScore); + float max = score[i - 1][j - 1] + (pairwiseScore * 10); + //float max = score[i - 1][j - 1] + (pairwiseScore); if (F[i][j] > max) { @@ -711,8 +711,8 @@ public class AlignSeq // top left hand element score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0), - s2str.charAt(0)); - //s2str.charAt(0)) * 10; + //s2str.charAt(0)); + s2str.charAt(0)) * 10; E[0][0] = -GAP_EXTEND_COST; F[0][0] = 0; @@ -726,8 +726,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0), s2str.charAt(j)); - //score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST, - score[0][j] = max(pairwiseScore, -GAP_OPEN_COST, + score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST, + //score[0][j] = max(pairwiseScore, -GAP_OPEN_COST, -GAP_EXTEND_COST); traceback[0][j] = 1; @@ -742,8 +742,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(0)); - //score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]); - score[i][0] = max(pairwiseScore, E[i][0], F[i][0]); + score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]); + //score[i][0] = max(pairwiseScore, E[i][0], F[i][0]); traceback[i][0] = -1; } @@ -759,8 +759,8 @@ public class AlignSeq float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), s2str.charAt(j)); - //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10), - score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore), + score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10), + //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore), E[i][j], F[i][j]); traceback[i][j] = findTrace(i, j); } @@ -1215,21 +1215,26 @@ public class AlignSeq */ public void meanScore() { - int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length(); + //int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length(); + int length = indelfreeAstr1.length(); //both have the same length + //create HashMap for counting residues in each sequence HashMap seq1ResCount = new HashMap(); HashMap seq2ResCount = new HashMap(); - for (char residue: s1str.toCharArray()) + // for both sequences (String indelfreeAstr1 or 2) create a key for the residue and add 1 each time its encountered + for (char residue: indelfreeAstr1.toCharArray()) { seq1ResCount.putIfAbsent(residue, 0); seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1); } - for (char residue: s2str.toCharArray()) + for (char residue: indelfreeAstr2.toCharArray()) { seq2ResCount.putIfAbsent(residue, 0); seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1); } + // meanscore = for each residue pair get the number of appearance and add (countA * countB * pairwiseScore(AB)) + // divide the meanscore by the sequence length afterwards float _meanscore = 0; for (char resA : seq1ResCount.keySet()) { @@ -1257,20 +1262,21 @@ public class AlignSeq */ public void hypotheticMaxScore() { - float _hmsA = 0f; - float _hmsB = 0f; - for (char residue: s1str.toCharArray()) + int _hmsA = 0; + int _hmsB = 0; + for (char residue: indelfreeAstr1.toCharArray()) { _hmsA += scoreMatrix.getPairwiseScore(residue, residue); } - for (char residue: s2str.toCharArray()) + for (char residue: indelfreeAstr2.toCharArray()) { _hmsB += scoreMatrix.getPairwiseScore(residue, residue); } - this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB; + this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB; // take the lower self alignment + } - public float getHypotheticMaxScore() + public int getHypotheticMaxScore() { return this.hypotheticMaxScore; } @@ -1283,7 +1289,7 @@ public class AlignSeq int n = astr1.length(); // both have the same length for (int i = 0; i < n; i++) { - if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i))) + if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i))) // if both sequences dont have a gap -> add to indelfreeAstr { this.indelfreeAstr1 += astr1.charAt(i); this.indelfreeAstr2 += astr2.charAt(i); @@ -1293,11 +1299,13 @@ public class AlignSeq /** * calculates the overall score of the alignment - * alignmentScore = sum of all scores - all penalties - * + * preprescore = sum of all scores - all penalties + * if preprescore < 1 ~ alignmentScore = Float.NaN > + * alignmentScore = ((preprescore - meanScore) / (hypotheticMaxScore - meanScore)) * coverage */ public void scoreAlignment() throws RuntimeException { + getIndelfreeAstr(); meanScore(); hypotheticMaxScore(); @@ -1306,17 +1314,16 @@ public class AlignSeq { throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore)); } - int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length(); + //int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length(); + int n = indelfreeAstr1.length(); - System.out.println(String.format("astr1: %c .. %c ; astr2: %c .. %c", astr1.toCharArray()[0], astr1.toCharArray()[astr1.length() - 1], astr2.toCharArray()[0], astr2.toCharArray()[astr2.length() - 1])); - float score = 0; boolean aGapOpen = false; boolean bGapOpen = false; for (int i = 0; i < n; i++) { - char char1 = astr1.charAt(i); - char char2 = astr2.charAt(i); + char char1 = indelfreeAstr1.charAt(i); + char char2 = indelfreeAstr2.charAt(i); boolean aIsLetter = Character.isLetter(char1); boolean bIsLetter = Character.isLetter(char2); if (aIsLetter && bIsLetter) // if pair -> get score @@ -1328,18 +1335,19 @@ public class AlignSeq } else { // no gap open -> score - gap_open score -= GAP_OPEN_COST; } + // adjust GapOpen status in both sequences aGapOpen = (!aIsLetter) ? true : false; bGapOpen = (!bIsLetter) ? true : false; } - float preprescore = score; + float preprescore = score; // if this score < 1 --> alignment score = Float.NaN score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore); int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max} - float coverage = (float) indelfreeAstr1.length() / (float) _max[1]; - float prescore = score; + float coverage = (float) n / (float) _max[1]; // indelfreeAstr length / longest sequence length + float prescore = score; // only debug score *= coverage; - System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %f", preprescore, prescore, indelfreeAstr1.length(), score, this.meanScore, this.hypotheticMaxScore)); + System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %d", preprescore, prescore, _max[1], score, this.meanScore, this.hypotheticMaxScore)); this.alignmentScore = (preprescore < 1) ? Float.NaN : score; } } diff --git a/src/jalview/analysis/Connectivity.java b/src/jalview/analysis/Connectivity.java index 8d56b43..0f849e3 100644 --- a/src/jalview/analysis/Connectivity.java +++ b/src/jalview/analysis/Connectivity.java @@ -73,7 +73,7 @@ public class Connectivity if (connection < dim) { // a popup saying that it failed would be nice - //throw new ConnectivityException(sequence.getName(), connection, dim); + throw new ConnectivityException(sequence.getName(), connection, dim); } } ); diff --git a/src/jalview/analysis/PaSiMap.java b/src/jalview/analysis/PaSiMap.java index e2a9882..e9236cb 100755 --- a/src/jalview/analysis/PaSiMap.java +++ b/src/jalview/analysis/PaSiMap.java @@ -28,7 +28,6 @@ import jalview.datamodel.Point; import jalview.datamodel.SequenceI; import jalview.gui.PairwiseAlignPanel; import jalview.gui.PaSiMapPanel; -import jalview.math.ccAnalysis; import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.viewmodel.AlignmentViewport; @@ -113,13 +112,19 @@ public class PaSiMap implements Runnable { Point[] out = new Point[getHeight()]; - for (int i = 0; i < getHeight(); i++) + for (int i = 0; i < out.length; i++) { float x = (float) component(i, l) * factor; float y = (float) component(i, n) * factor; float z = (float) component(i, mm) * factor; out[i] = new Point(x, y, z); } + //&! + System.out.println("Points:"); + for (Point point : out) + { + System.out.println(point.toString()); + } return out; } @@ -157,14 +162,20 @@ public class PaSiMap implements Runnable */ double component(int row, int n) { + /* double out = 0.0; for (int i = 0; i < pairwiseScores.width(); i++) { + double pairwiseScore = pairwiseScores.getValue(row, i); + double eigenScore = eigenMatrix.getValue(i, n); out += (pairwiseScores.getValue(row, i) * eigenMatrix.getValue(i, n)); } return out / eigenMatrix.getD()[n]; + */ + System.out.println(String.format("row %d, col %d", row, n)); + return eigenMatrix.getValue(row, n); } /** @@ -189,6 +200,7 @@ public class PaSiMap implements Runnable /* * tridiagonal matrix, with D and E vectors */ + /* sb.append(" ---Tridiag transform matrix ---\n"); sb.append(" --- D vector ---\n"); tridiagonal.printD(ps, "%15.4e"); @@ -196,6 +208,7 @@ public class PaSiMap implements Runnable sb.append("--- E vector ---\n"); tridiagonal.printE(ps, "%15.4e"); ps.println(); + */ /* * eigenvalues matrix, with D vector @@ -211,69 +224,40 @@ public class PaSiMap implements Runnable /** * Performs the PaSiMap calculation + * + * creates a new gui/PairwiseAlignPanel with the input sequences (<++>/AlignmentViewport) + * uses analysis/AlignSeq to creatue the pairwise alignments and calculate the AlignmentScores (float for each pair) + * gets all float[][] scores from the gui/PairwiseAlignPanel + * checks the connections for each sequence with <++>/AlignmentViewport seqs.calculateConnectivity(float[][] scores, int dim) (from analysis/Connectivity) -- throws an Exception if insufficient + * creates a math/MatrixI pairwiseScores of the float[][] scores + * copys the scores and fills the diagonal to create a symmetric matrix using math/Matrix.fillDiagonal() + * performs the analysis/ccAnalysis with the symmetric matrix + * gets the eigenmatrix and the eigenvalues using math/Matrix.tqli() */ @Override public void run() { try { - /** here goes pasimap - * FASTA_to_secureFASTA + sed s/ /_/g ~~ aliases = 1st word of secure fasta header - * FASTA_to_stats ~~ unique_headers_count, unique_bodies_count + check: unique_bodies_min, dim_max - * if unaligned : EMBOSS needlall ~~ alignments.fas - * elseif aligned : MSA_to_pairwiseFASTA - * pairwiseFASTA_to_pairwiseCSV - * pairwise_to_connectivity ~~ connectivity.csv + warn if sequence has #connections >= dim - * pairwiseCSV_to_pairwiseQuantifier (use scoreModel) + label results - * cc_analysis quantifier.ssv ~~ vec.ssv + label results - * plot - * - * ###################### - * - * input AlignemtView seqs ~~ check if aligned - * if not ~~ perform needlall (or try whatever is in there) - * calculate connectivity + check if fine - * quantify alignment with ScoreModelI scoreModel - * cc_analysis with quantification - * plot whatever comes out of cc_analysis - */ - - /* - bool aligned = seqs.checkoridk; - if (!aligned) - { - seqs = needlall(seqs); - } - - connectivity - - pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); - - cc_analysis - */ - // run needleman regardless if aligned or not // gui.PairwiseAlignPanel <++> PairwiseAlignPanel alignment = new PairwiseAlignPanel(seqs); float[][] scores = alignment.getAlignmentScores(); //bigger index first -- eg scores[14][13] - /** TODO scores still not same as in original pasimap */ - System.out.println(scores[8][0]); Hashtable connectivity = seqs.calculateConnectivity(scores, dim); pairwiseScores = new Matrix(scores); - System.out.println(pairwiseScores.getValue(8,0)); pairwiseScores.fillDiagonal(); - System.out.println(pairwiseScores.getValue(8,0)); - /** TODO implement cc_analysis */ - pairwiseScores = ccAnalysis.run(pairwiseScores); - - /** code like in pca, needed for plot */ + ccAnalysis cc = new ccAnalysis(pairwiseScores, dim); + pairwiseScores = cc.run(); tridiagonal = pairwiseScores.copy(); - tridiagonal.tred(); + //tridiagonal.tred(); + + /** perform the eigendecomposition for the plot */ eigenMatrix = tridiagonal.copy(); - eigenMatrix.tqli(); + eigenMatrix.setD(pairwiseScores.getD()); + //eigenMatrix.tqli(); System.out.println(getDetails()); @@ -311,7 +295,7 @@ public class PaSiMap implements Runnable } /** - * Answers the N dimensions of the NxN PaSiMap matrix. This is the number of + * Answers the N dimensions of the NxM PaSiMap matrix. This is the number of * sequences involved in the pairwise score calculation. * * @return @@ -323,6 +307,18 @@ public class PaSiMap implements Runnable } /** + * Answers the M dimensions of the NxM PaSiMap matrix. This is the number of + * sequences involved in the pairwise score calculation. + * + * @return + */ + public int getWidth() + { + // TODO can any of seqs[] be null? + return pairwiseScores.width();// seqs.getSequences().length; + } + + /** * Answers the sequence pairwise similarity scores which were the first step * of the PaSiMap calculation * diff --git a/src/jalview/analysis/ccAnalysis.java b/src/jalview/analysis/ccAnalysis.java new file mode 100755 index 0000000..948aa8f --- /dev/null +++ b/src/jalview/analysis/ccAnalysis.java @@ -0,0 +1,1008 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ + +/* +* Copyright 2018-2022 Kathy Su, Kay Diederichs +* +* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along with this program. If not, see . +*/ + +/** +* Ported from https://doi.org/10.1107/S2059798317000699 by +* @AUTHOR MorellThomas +*/ + +package jalview.analysis; + +import jalview.bin.Console; +import jalview.math.MatrixI; +import jalview.math.Matrix; +import jalview.math.MiscMath; + +import java.lang.Math; +import java.lang.System; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.analysis.MultivariateMatrixFunction; +import org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer; +import org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem; +import org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder; +import org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.SingularValueDecomposition; + +/** + * A class to model rectangular matrices of double values and operations on them + */ +public class ccAnalysis +{ + private byte dim = 0; //dimensions + + private MatrixI scoresOld; //input scores + + public ccAnalysis(MatrixI scores, byte dim) + { + //&! round matrix to .4f to be same as in pasimap + for (int i = 0; i < scores.height(); i++) + { + for (int j = 0; j < scores.width(); j++) + { + if (!Double.isNaN(scores.getValue(i,j))) + { + scores.setValue(i, j, (double) Math.round(scores.getValue(i,j) * (int) 10000) / 10000); + } + } + } + this.scoresOld = scores; + this.dim = dim; + } + + /** TODO + * DOCUMENT ME + * + * @param hSigns ~ hypothesis signs (+/-) for each sequence + * @param scores ~ input score matrix + * + * @return distrustScores + */ + private int[] initialiseDistrusts(byte[] hSigns, MatrixI scores) + { + int[] distrustScores = new int[scores.width()]; + + // loop over symmetric matrix + for (int i = 0; i < scores.width(); i++) + { + byte hASign = hSigns[i]; + int conHypNum = 0; + int proHypNum = 0; + + for (int j = 0; j < scores.width(); j++) + { + double cell = scores.getRow(i)[j]; // value at [i][j] in scores + byte hBSign = hSigns[j]; + if (!Double.isNaN(cell)) + { + byte cellSign = (byte) Math.signum(cell); //check if sign of matrix value fits hyptohesis + if (cellSign == hASign * hBSign) + { + proHypNum++; + } else { + conHypNum++; + } + } + } + distrustScores[i] = conHypNum - proHypNum; //create distrust score for each sequence + } + return distrustScores; + } + + /** TODO + * DOCUMENT ME + * + * @param hSigns ~ hypothesis signs (+/-) + * @param distrustScores + * @param scores ~ input score matrix + * + * @return hSigns + */ + private byte[] optimiseHypothesis(byte[] hSigns, int[] distrustScores, MatrixI scores) + { + // get maximum distrust score + int[] maxes = MiscMath.findMax(distrustScores); + int maxDistrustIndex = maxes[0]; + int maxDistrust = maxes[1]; + + // if hypothesis is not optimal yet + if (maxDistrust > 0) + { + //toggle sign for hI with maximum distrust + hSigns[maxDistrustIndex] *= -1; + // update distrust at same position + distrustScores[maxDistrustIndex] *= -1; + + // also update distrust scores for all hI that were not changed + byte hASign = hSigns[maxDistrustIndex]; + for (int NOTmaxDistrustIndex = 0; NOTmaxDistrustIndex < distrustScores.length; NOTmaxDistrustIndex++) + { + if (NOTmaxDistrustIndex != maxDistrustIndex) + { + byte hBSign = hSigns[NOTmaxDistrustIndex]; + double cell = scores.getRow(maxDistrustIndex)[NOTmaxDistrustIndex]; + + // distrust only changed if not NaN + if (!Double.isNaN(cell)) + { + byte cellSign = (byte) Math.signum(cell); + // if sign of cell matches hypothesis decrease distrust by 2 because 1 more value supporting and 1 less contradicting + // else increase by 2 + if (cellSign == hASign * hBSign) + { + distrustScores[NOTmaxDistrustIndex] -= 2; + } else { + distrustScores[NOTmaxDistrustIndex] += 2; + } + } + } + } + //further optimisation necessary + return optimiseHypothesis(hSigns, distrustScores, scores); + + } else { + return hSigns; + } + } + + /** + * takes the a symmetric MatrixI as input scores which may contain Double.NaN + * approximate the missing values using hypothesis optimisation + * + * runs analysis + * + * @param scores ~ score matrix + * + * @return + */ + public MatrixI run () + { + MatrixI eigenMatrix = scoresOld.copy(); + MatrixI repMatrix = scoresOld.copy(); + try + { + /* + * Calculate correction factor for 2nd and higher eigenvalue(s). + * This correction is NOT needed for the 1st eigenvalue, because the + * unknown (=NaN) values of the matrix are approximated by presuming + * 1-dimensional vectors as the basis of the matrix interpretation as dot + * products. + */ + + //&! debug + System.out.println("input:"); + eigenMatrix.print(System.out, "%1.4f "); + int matrixWidth = eigenMatrix.width(); // square matrix, so width == height + int matrixElementsTotal = (int) Math.pow(matrixWidth, 2); //total number of elemts + + float correctionFactor = (float) (matrixElementsTotal - eigenMatrix.countNaN()) / (float) matrixElementsTotal; + + /* + * Calculate hypothetical value (1-dimensional vector) h_i for each + * dataset by interpreting the given correlation coefficients as scalar + * products. + */ + + /* + * Memory for current hypothesis concerning sign of each h_i. + * List of signs for all h_i in the encoding: + * * 1: positive + * * 0: zero + * * -1: negative + * Initial hypothesis: all signs are positive. + */ + byte[] hSigns = new byte[matrixWidth]; + Arrays.fill(hSigns, (byte) 1); + + //Estimate signs for each h_i by refining hypothesis on signs. + hSigns = optimiseHypothesis(hSigns, initialiseDistrusts(hSigns, eigenMatrix), eigenMatrix); + + + //Estimate absolute values for each h_i by determining sqrt of mean of + //non-NaN absolute values for every row. + //<++> hAbs = //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) + MiscMath.print(eigenMatrix.absolute().meanRow(), "%1.8f"); + double[] hAbs = MiscMath.sqrt(eigenMatrix.absolute().meanRow()); //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) + + //Combine estimated signs with absolute values in obtain total value for + //each h_i. + double[] hValues = MiscMath.elementwiseMultiply(hSigns, hAbs); + //<++>hValues.reshape((1,matrixWidth)); // doesnt it already look like this + + /*Complement symmetric matrix by using the scalar products of estimated + *values of h_i to replace NaN-cells. + *Matrix positions that have estimated values + *(only for diagonal and upper off-diagonal values, due to the symmetry + *the positions of the lower-diagonal values can be inferred). + List of tuples (row_idx, column_idx).*/ + + ArrayList estimatedPositions = new ArrayList(); + + // for off-diagonal cells + for (int rowIndex = 0; rowIndex < matrixWidth - 1; rowIndex++) + { + for (int columnIndex = rowIndex + 1; columnIndex < matrixWidth; columnIndex++) + { + double cell = eigenMatrix.getValue(rowIndex, columnIndex); + if (Double.isNaN(cell)) + { + //calculate scalar product as new cell value + cell = hValues[rowIndex] * hValues[columnIndex]; // something is wrong with hAbs andhValues and everything!!!!!!!!!!!! + //fill in new value in cell and symmetric partner + eigenMatrix.setValue(rowIndex, columnIndex, cell); + eigenMatrix.setValue(columnIndex, rowIndex, cell); + //save positions of estimated values + estimatedPositions.add(new int[]{rowIndex, columnIndex}); + } + } + } + + // for diagonal cells + for (int diagonalIndex = 0; diagonalIndex < matrixWidth; diagonalIndex++) + { + double cell = Math.pow(hValues[diagonalIndex], 2); + eigenMatrix.setValue(diagonalIndex, diagonalIndex, cell); + estimatedPositions.add(new int[]{diagonalIndex, diagonalIndex}); + } + + /*Refine total values of each h_i: + *Initialise h_values of the hypothetical non-existant previous iteration + *with the correct format but with impossible values. + Needed for exit condition of otherwise endless loop.*/ + System.out.print("initial values: [ "); + for (double h : hValues) + { + System.out.print(String.format("%1.4f, ", h)); + } + System.out.println(" ]"); + + + double[] hValuesOld = new double[matrixWidth]; + + int iterationCount = 0; + + // repeat unitl values of h do not significantly change anymore + while (true) + { + for (int hIndex = 0; hIndex < matrixWidth; hIndex++) + { + //@python newH = np.sum(hValues * eigenMatrix[hIndex]) / np.sum(hValues ** 2) + double newH = Arrays.stream(MiscMath.elementwiseMultiply(hValues, eigenMatrix.getRow(hIndex))).sum() / Arrays.stream(MiscMath.elementwiseMultiply(hValues, hValues)).sum(); + hValues[hIndex] = newH; + } + + System.out.print(String.format("iteration %d: [ ", iterationCount)); + for (double h : hValues) + { + System.out.print(String.format("%1.4f, ", h)); + } + System.out.println(" ]"); + + //update values of estimated positions + for (int[] pair : estimatedPositions) // pair ~ row, col + { + double newVal = hValues[pair[0]] * hValues[pair[1]]; + eigenMatrix.setValue(pair[0], pair[1], newVal); + eigenMatrix.setValue(pair[1], pair[0], newVal); + } + + iterationCount++; + + //exit loop as soon as new values are similar to the last iteration + if (MiscMath.allClose(hValues, hValuesOld, 0d, 1e-05d, false)) + { + break; + } + + //save hValues for comparison in the next iteration + System.arraycopy(hValues, 0, hValuesOld, 0, hValues.length); + } + + //----------------------------- + //Use complemented symmetric matrix to calculate final representative + //vectors. + //&! debug + System.out.println("after estimating:"); + eigenMatrix.print(System.out, "%1.8f "); + + //Eigendecomposition. + eigenMatrix.tred(); + System.out.println("tred"); + eigenMatrix.print(System.out, "%8.2f"); + + eigenMatrix.tqli(); + System.out.println("eigenvals"); + eigenMatrix.printD(System.out, "%2.4f "); + System.out.println(); + System.out.println("tqli"); + eigenMatrix.print(System.out, "%8.2f"); + + double[] eigenVals = eigenMatrix.getD(); + + /* + TreeMap eigenPairs = new TreeMap<>(Comparator.reverseOrder()); + for (int i = 0; i < eigenVals.length; i++) + { + eigenPairs.put(eigenVals[i], eigenMatrix.getColumn(i)); + } + */ + TreeMap eigenPairs = new TreeMap<>(Comparator.reverseOrder()); + for (int i = 0; i < eigenVals.length; i++) + { + eigenPairs.put(eigenVals[i], i); + } + + // matrix of representative eigenvectors (each row is a vector) + double[][] _repMatrix = new double[eigenVals.length][dim]; //last ones were dim + double[][] _oldMatrix = new double[eigenVals.length][dim]; + double[] correctedEigenValues = new double[dim]; + + //for (Entry pair : eigenPairs.entrySet()) + int l = 0; + for (Entry pair : eigenPairs.entrySet()) + { + double eigenValue = pair.getKey(); + int column = pair.getValue(); + double[] eigenVector = eigenMatrix.getColumn(column); + //for 2nd and higher eigenvalues + if (l >= 1) + { + eigenValue /= correctionFactor; + } + //l++; + //correctedEigenValues[dim - l] = eigenValue; + correctedEigenValues[l] = eigenValue; + for (int j = 0; j < eigenVector.length; j++) + { + //_repMatrix[j][dim - l] = (eigenValue < 0) ? 0.0 : Math.sqrt(eigenValue) * eigenVector[j]; + _repMatrix[j][l] = (eigenValue < 0) ? 0.0 : - Math.sqrt(eigenValue) * eigenVector[j]; + double tmpOldScore = scoresOld.getColumn(column)[j]; + _oldMatrix[j][dim - l - 1] = (Double.isNaN(tmpOldScore)) ? 0.0 : tmpOldScore; + } + l++; + if (l >= dim) + { + break; + } + } + + System.out.println("correctedEigenValues"); + MiscMath.print(correctedEigenValues, "%2.4f "); + + repMatrix = new Matrix(_repMatrix); + repMatrix.setD(correctedEigenValues); + MatrixI oldMatrix = new Matrix(_oldMatrix); //TODO do i even need it anymore? + + System.out.println("old matrix"); + oldMatrix.print(System.out, "%8.2f"); + + System.out.println("scoresOld"); + scoresOld.print(System.out, "%1.4f "); + + System.out.println("rep matrix"); + repMatrix.print(System.out, "%1.8f "); + + MatrixI dotMatrix = repMatrix.postMultiply(repMatrix.transpose()); + System.out.println("dot matrix"); + dotMatrix.print(System.out, "%1.8f "); + + double rmsd = scoresOld.rmsd(dotMatrix); //TODO do i need this here? + System.out.println(rmsd); + + System.out.println("iteration, rmsd, maxDiff, rmsdDiff"); + System.out.println(String.format("0, %8.5f, -, -", rmsd)); + // Refine representative vectors by minimising sum-of-squared deviates between dotMatrix and original score matrix + for (int iteration = 1; iteration < 21; iteration++) // arbitrarily set to 20 + { + MatrixI repMatrixOLD = repMatrix.copy(); + MatrixI dotMatrixOLD = dotMatrix.copy(); + + // for all rows/hA in the original matrix + for (int hAIndex = 0; hAIndex < oldMatrix.height(); hAIndex++) + { + double[] row = oldMatrix.getRow(hAIndex); + double[] hA = repMatrix.getRow(hAIndex); // inverted + hAIndex = hAIndex; + //find least-squares-solution fo rdifferences between original scores and representative vectors + //--> originalToEquasionSystem(hA, hAIndex, repMatrix, row) --> double[] + System.out.println(String.format("||||||||||||||||||||||||||||||||||||||||||||\nIteration: %d", iteration)); + //repMatrix = new Matrix( new double[][]{{ 0.92894902, -0.25013783, -0.0051076 }, { 0.91955135, -0.25024707, -0.00516568}, { 0.90957348, -0.21717002, -0.14259899}, { 0.90298063, -0.21678816, -0.14697814}, { 0.9157065, -0.04646437, 0.10105454}, { 0.9050301, -0.04689785, 0.18964432}, { 0.92498545, 0.03881933, 0.14771523}, { 0.88008842, 0.0142395, 0.11356242}, { 0.94276528, 0.25591474, -0.07190911}, { 0.93939976, 0.23375136, -0.07530558}, { 0.93550782, 0.24635804, -0.07317563}, { 0.92497731, 0.21729928, -0.02361162}}); + //scoresOld = new Matrix( new double[][]{{ Double.NaN, 0.9914, 0.8879, 0.8803, 0.8528, 0.8481, 0.8434, 0.8174, 0.8174, 0.8232, 0.8148, 0.8114}, {0.9914, Double.NaN, 0.8792, 0.8717, 0.8441, 0.8395, 0.8347, 0.8087, 0.8085, 0.8143, 0.8059, 0.8024}, {0.8879, 0.8792, Double.NaN, 0.9578, 0.8289, 0.8075, 0.8268, 0.7815, 0.8165, 0.8168, 0.8084, 0.7974}, {0.8803, 0.8717, 0.9578, Double.NaN, 0.838, 0.7974, 0.8058, 0.7798, 0.8094, 0.8098, 0.8067, 0.7905}, {0.8528, 0.8441, 0.8289, 0.838, Double.NaN, 0.8954, 0.8412, 0.7879, 0.8418, 0.8384, 0.8389, 0.8556}, {0.8481, 0.8395, 0.8075, 0.7974, 0.8954, Double.NaN, 0.8699, 0.8106, 0.8267, 0.8234, 0.8222, 0.8241}, {0.8434, 0.8347, 0.8268, 0.8058, 0.8412, 0.8699, Double.NaN, 0.869, 0.8745, 0.8583, 0.8661, 0.8593}, {0.8174, 0.8087, 0.7815, 0.7798, 0.7879, 0.8106, 0.869, Double.NaN, 0.8273, 0.8331, 0.8137, 0.8029}, {0.8174, 0.8085, 0.8165, 0.8094, 0.8418, 0.8267, 0.8745, 0.8273, Double.NaN, 0.967, 0.978, 0.9373}, {0.8232, 0.8143, 0.8168, 0.8098, 0.8384, 0.8234, 0.8583, 0.8331, 0.967, Double.NaN, 0.9561, 0.9337}, {0.8148, 0.8059, 0.8084, 0.8067, 0.8389, 0.8222, 0.8661, 0.8137, 0.978, 0.9561, Double.NaN, 0.9263}, {0.8114, 0.8024, 0.7974, 0.7905, 0.8556, 0.8241, 0.8593, 0.8029, 0.9373, 0.9337, 0.9263, Double.NaN}}); + double[] hAlsm = leastSquaresOptimisation(repMatrix, scoresOld, hAIndex); + // update repMatrix with new hAlsm + for (int j = 0; j < repMatrix.width(); j++) + { + repMatrix.setValue(hAIndex, j, hAlsm[j]); + } + break; + } + + // dot product of representative vecotrs yields a matrix with values approximating the correlation matrix + dotMatrix = repMatrix.postMultiply(repMatrix.transpose()); + // calculate rmsd between approximation and correlation matrix + rmsd = scoresOld.rmsd(dotMatrix); + + // calculate maximum change of representative vectors of current iteration + //repMatrix.subtract(repMatrixOLD).print(System.out, "%8.2f"); + MatrixI diff = repMatrix.subtract(repMatrixOLD).absolute(); + double maxDiff = 0.0; + for (int i = 0; i < diff.height(); i++) + { + for (int j = 0; j < diff.width(); j++) + { + maxDiff = (diff.getValue(i, j) > maxDiff) ? diff.getValue(i, j) : maxDiff; + } + } + System.out.println(String.format("maxDiff: %f", maxDiff)); + + // calculate rmsd between current and previous estimation + double rmsdDiff = dotMatrix.rmsd(dotMatrixOLD); + + System.out.println(String.format("%d, %8.5f, %8.5f, %8.5f", iteration, rmsd, maxDiff, rmsdDiff)); + + if (!(Math.abs(maxDiff) > 1e-06)) + { + repMatrix = repMatrixOLD.copy(); + break; + } + break; + } + + + } catch (Exception q) + { + Console.error("Error computing cc_analysis: " + q.getMessage()); + q.printStackTrace(); + } + //repMatrix = repMatrix.transpose(); + System.out.println("final repMatrix"); + repMatrix.print(System.out, "%8.2f"); + return repMatrix; + } + + /** + * Create equations system using information on originally known + * pairwise correlation coefficients (parsed from infile) and the + * representative result vectors + * + * Each equation has the format: + * hA * hA - pairwiseCC = 0 + * with: + * hA: unknown variable + * hB: known representative vector + * pairwiseCC: known pairwise correlation coefficien + * + * The resulting equations system is overdetermined, if there are more + * equations than unknown elements + * + * @param x ~ unknown n-dimensional column-vector + * (needed for generating equations system, NOT to be specified by user). + * @param hAIndex ~ index of currently optimised representative result vector. + * @param h ~ matrix with row-wise listing of representative result vectors. + * @param originalRow ~ matrix-row of originally parsed pairwise correlation coefficients. + * + * @return + */ + //private double[] originalToEquasionSystem(double[] x, int hAIndex, MatrixI h, MatrixI originalScores) + private double[] originalToEquasionSystem(double[] hA, MatrixI repMatrix, MatrixI scoresOld, int hAIndex) + { + double[] originalRow = scoresOld.getRow(hAIndex); + int nans = MiscMath.countNaN(originalRow); + double[] result = new double[originalRow.length - nans]; + + //for all pairwiseCC in originalRow + int resultIndex = 0; + for (int hBIndex = 0; hBIndex < originalRow.length; hBIndex++) + { + double pairwiseCC = originalRow[hBIndex]; + // if not NaN -> create new equation and add it to the system + if (!Double.isNaN(pairwiseCC)) + { + double[] hB = repMatrix.getRow(hBIndex); + result[resultIndex++] = MiscMath.sum(MiscMath.elementwiseMultiply(hA, hB)) - pairwiseCC; + } else { + } + } + return result; + } + + /** + * returns the jacobian matrix + * @param repMatrix ~ matrix of representative vectors + * @param hAIndex ~ current row index + * + * @return + */ + private MatrixI approximateDerivative(MatrixI repMatrix, MatrixI scoresOld, int hAIndex) + { + //hA = x0 + double[] hA = repMatrix.getRow(hAIndex); + double[] f0 = originalToEquasionSystem(hA, repMatrix, scoresOld, hAIndex); + System.out.println("Approximate derivative with "); + System.out.print("hA (x): "); + MiscMath.print(hA, "%1.8f"); + System.out.print("f0: "); + MiscMath.print(f0, "%1.8f"); + double[] signX0 = new double[hA.length]; + double[] xAbs = new double[hA.length]; + for (int i = 0; i < hA.length; i++) + { + signX0[i] = (hA[i] >= 0) ? 1 : -1; + xAbs[i] = (Math.abs(hA[i]) >= 1.0) ? Math.abs(hA[i]) : 1.0; + } + double rstep = Math.pow(Math.ulp(1.0), 0.5); + + double[] h = new double [hA.length]; + for (int i = 0; i < hA.length; i++) + { + h[i] = rstep * signX0[i] * xAbs[i]; + } + + int m = f0.length; + int n = hA.length; + double[][] jTransposed = new double[n][m]; + for (int i = 0; i < h.length; i++) + { + double[] x = new double[h.length]; + System.arraycopy(hA, 0, x, 0, h.length); + x[i] += h[i]; + double dx = x[i] - hA[i]; + double[] df = originalToEquasionSystem(x, repMatrix, scoresOld, hAIndex); + for (int j = 0; j < df.length; j++) + { + df[j] -= f0[j]; + jTransposed[i][j] = df[j] / dx; + } + } + MatrixI J = new Matrix(jTransposed).transpose(); // inverted + return J; + } + + /** + * norm of regularized (by alpha) least-squares solution minus Delta + * @param alpha + * @param suf + * @param s + * @param Delta + * + * @return + */ + private double[] phiAndDerivative(double alpha, double[] suf, double[] s, double Delta) + { + double[] denom = MiscMath.elementwiseAdd(MiscMath.elementwiseMultiply(s, s), alpha); + double pNorm = MiscMath.norm(MiscMath.elementwiseDivide(suf, denom)); + double phi = pNorm - Delta; + // - sum ( suf**2 / denom**3) / pNorm + double phiPrime = - MiscMath.sum(MiscMath.elementwiseDivide(MiscMath.elementwiseMultiply(suf, suf), MiscMath.elementwiseMultiply(MiscMath.elementwiseMultiply(denom, denom), denom))) / pNorm; + return new double[]{phi, phiPrime}; + } + + /** + * class holding the result of solveLsqTrustRegion + */ + private class TrustRegion + { + private double[] step; + private double alpha; + private int iteration; + + public TrustRegion(double[] step, double alpha, int iteration) + { + this.step = step; + this.alpha = alpha; + this.iteration = iteration; + } + + public double[] getStep() + { + return this.step; + } + + public double getAlpha() + { + return this.alpha; + } + + public int getIteration() + { + return this.iteration; + } + } + + /** + * solve a trust-region problem arising in least-squares optimisation + * @param n ~ number of variables + * @param m ~ number of residuals + * @param uf ~ <++> + * @param s ~ singular values of J + * @param V ~ transpose of VT + * @param Delta ~ radius of a trust region + * @param alpha ~ initial guess for alpha + * + * @return + */ + private TrustRegion solveLsqTrustRegion(int n, int m, double[] uf, double[] s, MatrixI V, double Delta, double alpha) + { + double[] suf = MiscMath.elementwiseMultiply(s, uf); + + //check if J has full rank and tr Gauss-Newton step + boolean fullRank = false; + if (m >= n) + { + double threshold = s[0] * Math.ulp(1.0) * m; + fullRank = s[s.length - 1] > threshold; + } + if (fullRank) + { + double[] p = MiscMath.elementwiseMultiply(V.sumProduct(MiscMath.elementwiseDivide(uf, s)), -1); // inverted and roughly fine + if (MiscMath.norm(p) <= Delta) + { + TrustRegion result = new TrustRegion(p, 0.0, 0); + return result; + } + } + + double alphaUpper = MiscMath.norm(suf) / Delta; + double alphaLower = 0.0; + if (fullRank) + { + double[] phiAndPrime = phiAndDerivative(0.0, suf, s, Delta); + alphaLower = - phiAndPrime[0] / phiAndPrime[1]; + } + + alpha = (!fullRank && alpha == 0.0) ? alpha = Math.max(0.001 * alphaUpper, Math.pow(alphaLower * alphaUpper, 0.5)) : alpha; + + int iteration = 0; + while (iteration < 10) // 10 is default max_iter + { + alpha = (alpha < alphaLower || alpha > alphaUpper) ? alpha = Math.max(0.001 * alphaUpper, Math.pow(alphaLower * alphaUpper, 0.5)) : alpha; + double[] phiAndPrime = phiAndDerivative(alpha, suf, s, Delta); + double phi = phiAndPrime[0]; + double phiPrime = phiAndPrime[1]; + + alphaUpper = (phi < 0) ? alpha : alphaUpper; + double ratio = phi / phiPrime; + alphaLower = Math.max(alphaLower, alpha - ratio); + alpha -= (phi + Delta) * ratio / Delta; + + if (Math.abs(phi) < 0.01 * Delta) // default rtol set to 0.01 + { + break; + } + iteration++; + } + + // p = - V.dot( suf / (s**2 + alpha)) + double[] tmp = MiscMath.elementwiseDivide(suf, MiscMath.elementwiseAdd(MiscMath.elementwiseMultiply(s, s), alpha)); + double[] p = MiscMath.elementwiseMultiply(V.sumProduct(tmp), -1); + + // Make the norm of p equal to Delta, p is changed only slightly during this. + // It is done to prevent p lie outside of the trust region + p = MiscMath.elementwiseMultiply(p, Delta / MiscMath.norm(p)); + + TrustRegion result = new TrustRegion(p, alpha, iteration + 1); + return result; + } + + /** + * compute values of a quadratic function arising in least squares + * function: 0.5 * s.T * (J.T * J + diag) * s + g.T * s + * + * @param J ~ jacobian matrix + * @param g ~ gradient + * @param s ~ steps and rows + * + * @return + */ + private double evaluateQuadratic(MatrixI J, double[] g, double[] s) + { + + //TODO s (-> stepH) is slightly different + double[] Js = J.sumProduct(s); //TODO completely wromg + double q = MiscMath.dot(Js, Js); + double l = MiscMath.dot(s, g); + + /* + System.out.println("doing evaluateQuadratic"); + System.out.println("inputs"); + System.out.print("J"); + J.print(System.out, "%f "); + System.out.print("g"); + MiscMath.print(g, "%f"); + System.out.print("s"); + MiscMath.print(s, "%f"); + System.out.print("\nJs"); + MiscMath.print(Js, "%f"); + System.out.println(String.format("0.5 * %f + %f", q, l)); + */ + + return 0.5 * q + l; + } + + /** + * update the radius of a trust region based on the cost reduction + * + * @param Delta + * @param actualReduction + * @param predictedReduction + * @param stepNorm + * @param boundHit + * + * @return + */ + private double[] updateTrustRegionRadius(double Delta, double actualReduction, double predictedReduction, double stepNorm, boolean boundHit) + { + double ratio = 0; + if (predictedReduction > 0) + { + ratio = actualReduction / predictedReduction; + } else if (predictedReduction == 0 && actualReduction == 0) { + ratio = 1; + } else { + ratio = 0; + } + + if (ratio < 0.25) + { + Delta = 0.25 * stepNorm; + } else if (ratio > 0.75 && boundHit) { + Delta *= 2.0; + } + + return new double[]{Delta, ratio}; + } + + /** + * check the termination condition for nonlinear least squares + * + * @param actualReduction + * @param cost + * @param stepNorm + * @param xNorm + * @param ratio + * + * @return + */ + private byte checkTermination(double actualReduction, double cost, double stepNorm, double xNorm, double ratio) + { + // default ftol and xtol = 1e-8 + boolean ftolSatisfied = actualReduction < (1e-8 * cost) && ratio > 0.25; + boolean xtolSatisfied = stepNorm < (1e-8 * (1e-8 + xNorm)); + + if (ftolSatisfied && xtolSatisfied) + { + return (byte) 4; + } else if (ftolSatisfied) { + return (byte) 2; + } else if (xtolSatisfied) { + return (byte) 3; + } else { + return (byte) 0; + } + } + + /** + * TODO DOCUMENT ME! + * @param repMatrix ~ Matrix containing representative vectors + * @param scoresOld ~ Matrix containing initial observations + * @param index ~ current row index + * @param J ~ jacobian matrix + * + * @return + */ + private double[] trf(MatrixI repMatrix, MatrixI scoresOld, int index, MatrixI J) + { + System.out.println("-----------------\nStart of trf"); + //hA = x0 + double[] hA = repMatrix.getRow(index); //inverted + double[] f0 = originalToEquasionSystem(hA, repMatrix, scoresOld, index); + int nfev = 1; // ?? + int njev = 1; // ?? + int m = J.height(); + int n = J.width(); + double cost = 0.5 * MiscMath.dot(f0, f0); + double[] g = J.transpose().sumProduct(f0); // inverted + double[] scale = new double[hA.length]; + Arrays.fill(scale, 1); // ?? + double Delta = MiscMath.norm(hA); + int maxNfev = hA.length * 100; // ?? + double alpha = 0.0; // ?? "Levenberg-Marquardt" parameter + + System.out.println("Checking initial values:"); + System.out.print("hA (x): "); + MiscMath.print(hA, "%1.8f"); + System.out.print("f0: "); + MiscMath.print(f0, "%1.8f"); + //System.out.println(String.format("nfev: %d, njev: %d, maxNfev: %d", nfev, njev, maxNfev)); + //System.out.println(String.format("m: %d, n: %d", m, n)); + System.out.println(String.format("cost: %1.8f, Delta: %1.8f, alpha: %1.8f", cost, Delta, alpha)); + System.out.print("g: "); + MiscMath.print(g, "%1.8f"); + + double gNorm = 0; + byte terminationStatus = 0; + int iteration = 0; + + System.out.println("outer while loop starts"); + while (true) + { + System.out.println(String.format("iteration: %d", iteration)); + + gNorm = MiscMath.norm(g); + if (terminationStatus != 0 || nfev == maxNfev) + { + System.out.println(String.format("outer loop broken with terminationStatus: %d and nfev: %d", terminationStatus, nfev)); + break; + } + // d = scale + SingularValueDecomposition svd = new SingularValueDecomposition(new Array2DRowRealMatrix(J.asArray())); + // svd not 100% correct -> origin of problems + MatrixI U = new Matrix(svd.getU().getData()); // inverted + double[] s = svd.getSingularValues(); // ?? + MatrixI V = new Matrix(svd.getV().getData()).transpose(); //TODO inverted origin of probelm + double[] uf = U.transpose().sumProduct(f0); + + System.out.println("After SVD"); + System.out.println(String.format("gNorm: %1.8f", gNorm)); + System.out.print("U: "); + U.print(System.out, "%1.8f "); + System.out.print("s: "); + MiscMath.print(s, "%1.8f"); + System.out.print("V: "); + V.print(System.out, "%1.8f "); + System.out.print("uf: "); + MiscMath.print(uf, "%1.8f"); + + double actualReduction = -1; + double[] xNew = new double[hA.length]; + double[] fNew = new double[f0.length]; + double costNew = 0; + double stepHnorm = 0; + + System.out.println("Inner while loop starts"); + + while (actualReduction <= 0 && nfev < maxNfev) + { + TrustRegion trustRegion = solveLsqTrustRegion(n, m, uf, s, V, Delta, alpha); + double[] stepH = trustRegion.getStep(); + alpha = trustRegion.getAlpha(); + int nIterations = trustRegion.getIteration(); + double predictedReduction = - (evaluateQuadratic(J, g, stepH)); + + xNew = MiscMath.elementwiseAdd(hA, stepH); + fNew = originalToEquasionSystem(xNew, repMatrix, scoresOld, index); + nfev++; + + stepHnorm = MiscMath.norm(stepH); + + System.out.println("After TrustRegion"); + System.out.print("stepH: "); + MiscMath.print(stepH, "%1.8f "); + System.out.println(String.format("alpha: %1.8f, nIterations: %d, predictedReduction: %1.8f, nfev: %d, stepHnorm: %1.8f", alpha, nIterations, predictedReduction, nfev, stepHnorm)); + System.out.print("xNew: "); + MiscMath.print(xNew, "%1.8f "); + System.out.print("fNew: "); + MiscMath.print(fNew, "%1.8f "); + + if (MiscMath.countNaN(fNew) > 0) + { + Delta = 0.25 * stepHnorm; + System.out.println(String.format("Loop continued with %d NaNs and Delta: %1.8f", MiscMath.countNaN(fNew), Delta)); + continue; + } + + // usual trust-region step quality estimation + costNew = 0.5 * MiscMath.dot(fNew, fNew); + actualReduction = cost - costNew; + + double[] updatedTrustRegion = updateTrustRegionRadius(Delta, actualReduction, predictedReduction, stepHnorm, stepHnorm > (0.95 * Delta)); + double DeltaNew = updatedTrustRegion[0]; + double ratio = updatedTrustRegion[1]; + + terminationStatus = checkTermination(actualReduction, cost, stepHnorm, MiscMath.norm(hA), ratio); + if (terminationStatus != 0) + { + break; + } + + alpha *= Delta / DeltaNew; + Delta = DeltaNew; + + System.out.println(String.format("actualReduction: %1.8f, alpha: %1.8f, Delta: %1.8f, termination_status: %d, cost_new: %1.8f", actualReduction, alpha, Delta, terminationStatus, costNew)); + //break; + } + System.out.println(String.format("actualReduction before check: %1.8f", actualReduction)); + if (actualReduction > 0) + { + hA = xNew; + f0 = fNew; + cost = costNew; + + J = approximateDerivative(repMatrix, scoresOld, index); + System.out.println("J in the end"); + J.print(System.out, "%1.8f "); + njev++; + + g = J.transpose().sumProduct(f0); + } else { + stepHnorm = 0; + actualReduction = 0; + } + iteration++; + } + + System.out.println("into OptimizeResult"); + System.out.println("x (hA)"); + MiscMath.print(hA, "%1.8f"); + System.out.println(String.format("cost: %1.8f", cost)); + System.out.println("f0"); + MiscMath.print(f0, "%1.8f"); + System.out.println("J"); + J.print(System.out, "%1.8f "); + System.out.println("g"); + MiscMath.print(g, "%1.8f"); + System.out.println(String.format("gNorm: %1.8f", gNorm)); + System.out.println(String.format("nfev: %d", nfev)); + System.out.println(String.format("njev: %d", njev)); + System.out.println(String.format("terminationStatus: %d", terminationStatus)); + // OptimizeResult(x, cost, f0, J, g, gNorm, 0 in shape x, nfev, njev, terminationStatus) + return hA; + } + + /** + * TODO DOCUMENT ME! + * @param repMatrix ~ Matrix containing representative vectors + * @param scoresOld ~ Matrix containing initial observations + * @param index ~ current row index + * + * @return + */ + private double[] leastSquaresOptimisation(MatrixI repMatrix, MatrixI scoresOld, int index) + { + System.out.println("lsq starts!!!"); + MatrixI J = approximateDerivative(repMatrix, scoresOld, index); + System.out.println("J"); + J.print(System.out, "%1.8f "); + double[] result = trf(repMatrix, scoresOld, index, J); + return result; + } + +} diff --git a/src/jalview/math/Matrix.java b/src/jalview/math/Matrix.java index 5eaa7c3..7870c14 100755 --- a/src/jalview/math/Matrix.java +++ b/src/jalview/math/Matrix.java @@ -191,7 +191,10 @@ public class Matrix implements MatrixI */ for (int k = 0; k < in.width(); k++) { - tmp[i][j] += (in.getValue(i, k) * this.value[k][j]); + if (!Double.isNaN(in.getValue(i,k)) && !Double.isNaN(this.value[k][j])) + { + tmp[i][j] += (in.getValue(i, k) * this.value[k][j]); + } } } } @@ -830,12 +833,24 @@ public class Matrix implements MatrixI } /** + * returns the matrix as a double[][] array + * + * @return + */ + @Override + public double[][] asArray() + { + return value; + } + + /** * Returns an array containing the values in the specified column * * @param col * * @return */ + @Override public double[] getColumn(int col) { double[] out = new double[rows]; @@ -858,7 +873,7 @@ public class Matrix implements MatrixI @Override public void printD(PrintStream ps, String format) { - for (int j = 0; j < rows; j++) + for (int j = 0; j < d.length; j++) { Format.print(ps, format, d[j]); } @@ -1000,6 +1015,26 @@ public class Matrix implements MatrixI } } + /** + * Add d to all entries of this matrix + * + * @param d ~ value to add + */ + @Override + public void add(double d) + { + for (double[] row : value) + { + if (row != null) + { + for (int i = 0; i < row.length; i++) + { + row[i] += d; + } + } + } + } + @Override public void setD(double[] v) { @@ -1053,6 +1088,7 @@ public class Matrix implements MatrixI /** * Returns a copy in which every value in the matrix is its absolute * + * @return */ @Override public MatrixI absolute() @@ -1075,6 +1111,7 @@ public class Matrix implements MatrixI /** * Returns the mean of each row * + * @return */ @Override public double[] meanRow() @@ -1094,6 +1131,7 @@ public class Matrix implements MatrixI /** * Returns the mean of each column * + * @return */ @Override public double[] meanCol() @@ -1111,8 +1149,54 @@ public class Matrix implements MatrixI } /** + * return a flattened matrix containing the sum of each column + * + * @return + */ + @Override + public double[] sumCol() + { + double[] sum = new double[cols]; + for (int j = 0; j < cols; j++) + { + double[] column = getColumn(j); + if (column != null) + { + sum[j] = MiscMath.sum(column); + } + } + return sum; + } + + /** + * returns the mean value of the complete matrix + * + * @return + */ + @Override + public double mean() + { + double sum = 0; + int nanCount = 0; + for (double[] row : value) + { + for (double col : row) + { + if (!Double.isNaN(col)) + { + sum += col; + } else { + nanCount++; + } + } + } + return sum / (double) (this.rows * this.cols - nanCount); + } + + /** * fills up a diagonal matrix with its transposed copy * !other side should be filled with 0 + * !keeps Double.NaN found in either side * * TODO check on which side it was diagonal and only do calculations for the other side */ @@ -1121,8 +1205,8 @@ public class Matrix implements MatrixI { int n = this.rows; int m = this.cols; - MatrixI copy = this.transpose(); - for (int i = 0; i < n; i++) + MatrixI copy = this.transpose(); // goes through each element in the matrix and + for (int i = 0; i < n; i++) // adds the value in the transposed copy to the original value { for (int j = 0; j < m; j++) { @@ -1156,4 +1240,179 @@ public class Matrix implements MatrixI return NaN; } + /** + * performs an element-wise addition of this matrix by another matrix ~ this - m + * @param m ~ other matrix + * + * @return + */ + @Override + public MatrixI add(MatrixI m) + { + if (m.width() != cols || m.height() != rows) + { + throw new IllegalArgumentException("Can't add a " + m.height() + "x" + m.width() + " to a " + this.rows + "x" + this.cols + " matrix"); + } + double[][] tmp = new double[this.rows][this.cols]; + for (int i = 0; i < this.rows; i++) + { + for (int j = 0; j < this.cols; j++) + { + tmp[i][j] = this.getValue(i,j) + m.getValue(i,j); + } + } + return new Matrix(tmp); + } + + /** + * performs an element-wise subtraction of this matrix by another matrix ~ this - m + * @param m ~ other matrix + * + * @return + */ + @Override + public MatrixI subtract(MatrixI m) + { + if (m.width() != cols || m.height() != rows) + { + throw new IllegalArgumentException("Can't subtract a " + m.height() + "x" + m.width() + " from a " + this.rows + "x" + this.cols + " matrix"); + } + double[][] tmp = new double[this.rows][this.cols]; + for (int i = 0; i < this.rows; i++) + { + for (int j = 0; j < this.cols; j++) + { + tmp[i][j] = this.getValue(i,j) - m.getValue(i,j); + } + } + return new Matrix(tmp); + } + + /** + * performs an element-wise multiplication of this matrix by another matrix ~ this * m + * @param m ~ other matrix + * + * @return + */ + @Override + public MatrixI elementwiseMultiply(MatrixI m) + { + if (m.width() != cols || m.height() != rows) + { + throw new IllegalArgumentException("Can't multiply a " + this.rows + "x" + this.cols + " by a " + m.height() + "x" + m.width() + " matrix"); + } + double[][] tmp = new double[this.rows][this.cols]; + for (int i = 0; i < this.rows; i++) + { + for (int j = 0; j < this.cols; j++) + { + tmp[i][j] = this.getValue(i, j) * m.getValue(i,j); + } + } + return new Matrix(tmp); + } + + /** + * performs an element-wise division of this matrix by another matrix ~ this / m + * @param m ~ other matrix + * + * @return + */ + @Override + public MatrixI elementwiseDivide(MatrixI m) + { + if (m.width() != cols || m.height() != rows) + { + throw new IllegalArgumentException("Can't divide a " + this.rows + "x" + this.cols + " by a " + m.height() + "x" + m.width() + " matrix"); + } + double[][] tmp = new double[this.rows][this.cols]; + for (int i = 0; i < this.rows; i++) + { + for (int j = 0; j < this.cols; j++) + { + tmp[i][j] = this.getValue(i, j) / m.getValue(i,j); + } + } + return new Matrix(tmp); + } + + /** + * calculate the root-mean-square for tow matrices + * @param m ~ other matrix + * + * @return + */ + @Override + public double rmsd(MatrixI m) + { + MatrixI squaredDeviates = this.subtract(m); + squaredDeviates = squaredDeviates.preMultiply(squaredDeviates); + return Math.sqrt(squaredDeviates.mean()); + } + + /** + * calculates the Frobenius norm of this matrix + * + * @return + */ + @Override + public double norm() + { + double result = 0; + for (double[] row : value) + { + for (double val : row) + { + result += Math.pow(val, 2); + } + } + return Math.sqrt(result); + } + + /** + * returns the sum of all values in this matrix + * + * @return + */ + @Override + public double sum() + { + double sum = 0; + for (double[] row : value) + { + for (double val : row) + { + sum += (Double.isNaN(val)) ? 0.0 : val; + } + } + return sum; + } + + /** + * returns the sum-product of this matrix with vector v + * @param v ~ vector + * + * @return + */ + @Override + public double[] sumProduct(double[] v) + { + if (v.length != cols) + { + throw new IllegalArgumentException("Vector and matrix do not have the same dimension! (" + v.length + " != " + cols + ")"); + } + double[] result = new double[rows]; + for (int i = 0; i < rows; i++) + { + double[] row = value[i]; + double sum = 0; + for (int j = 0; j < row.length; j++) + { + sum += row[j] * v[j]; + } + result[i] = sum; + } + return result; + } + } diff --git a/src/jalview/math/MatrixI.java b/src/jalview/math/MatrixI.java index 61d73dc..a42c75c 100644 --- a/src/jalview/math/MatrixI.java +++ b/src/jalview/math/MatrixI.java @@ -61,6 +61,13 @@ public interface MatrixI void setValue(int i, int j, double d); /** + * Returns the matrix as a double[][] array + * + * @return + */ + double[][] asArray(); + + /** * Answers a copy of the values in the i'th row * * @return @@ -68,6 +75,13 @@ public interface MatrixI double[] getRow(int i); /** + * Answers a copy of the values in the i'th column + * + * @return + */ + double[] getColumn(int i); + + /** * Answers a new matrix with a copy of the values in this one * * @return @@ -161,6 +175,13 @@ public interface MatrixI void multiply(double d); /** + * Add d to all entries of this matrix + * + * @param d ~ value to add + */ + void add(double d); + + /** * Answers true if the two matrices have the same dimensions, and * corresponding values all differ by no more than delta (which should be a * positive value), else false @@ -187,6 +208,18 @@ public interface MatrixI double[] meanCol(); /** + * Returns a flattened matrix containing the sum of each column + * + * @return + */ + double[] sumCol(); + + /** + * returns the mean value of the complete matrix + */ + double mean(); + + /** * fills up a diagonal matrix with its transposed copy * !other side should be filled with either 0 or Double.NaN */ @@ -198,4 +231,80 @@ public interface MatrixI * @return */ int countNaN(); + + /** + * performs an element-wise addition of this matrix by another matrix + * !matrices have to be the same size + * @param m ~ other matrix + * + * @return + * @throws IllegalArgumentException + * if this and m do not have the same dimensions + */ + MatrixI add(MatrixI m); + + /** + * performs an element-wise subtraction of this matrix by another matrix + * !matrices have to be the same size + * @param m ~ other matrix + * + * @return + * @throws IllegalArgumentException + * if this and m do not have the same dimensions + */ + MatrixI subtract(MatrixI m); + + /** + * performs an element-wise multiplication of this matrix by another matrix ~ this * m + * !matrices have to be the same size + * @param m ~ other matrix + * + * @return + * @throws IllegalArgumentException + * if this and m do not have the same dimensions + */ + MatrixI elementwiseMultiply(MatrixI m); + + /** + * performs an element-wise division of this matrix by another matrix ~ this / m + * !matrices have to be the same size + * @param m ~ other matrix + * + * @return + * @throws IllegalArgumentException + * if this and m do not have the same dimensions + */ + MatrixI elementwiseDivide(MatrixI m); + + /** + * calculates the root-mean-square for two matrices + * @param m ~ other matrix + * + * @return + */ + double rmsd(MatrixI m); + + /** + * calculates the Frobenius norm of this matrix + * + * @return + */ + double norm(); + + /** + * returns the sum of all values in this matrix + * + * @return + */ + double sum(); + + /** + * returns the sum-product of this matrix with vector v + * @param v ~ vector + * + * @return + * @throws IllegalArgumentException + * if this.cols and v do not have the same length + */ + double[] sumProduct(double[] v); } diff --git a/src/jalview/math/MiscMath.java b/src/jalview/math/MiscMath.java index 4fde68a..d3cec30 100755 --- a/src/jalview/math/MiscMath.java +++ b/src/jalview/math/MiscMath.java @@ -20,6 +20,8 @@ */ package jalview.math; +import jalview.util.Format; + import java.lang.Math; import java.util.Arrays; @@ -30,23 +32,58 @@ import java.util.Arrays; public class MiscMath { /** + * prints an array + * @param m ~ array + */ + public static void print(double[] m, String format) + { + System.out.print("[ "); + for (double a : m) + { + Format.print(System.out, format + " ", a); + } + System.out.println("]"); + } + + /** * calculates the mean of an array * * @param m ~ array * @return - * TODO whats with nans?? */ public static double mean(double[] m) { double sum = 0; + int nanCount = 0; for (int i = 0; i < m.length; i++) { - if (!Double.isNaN(m[i])) + if (!Double.isNaN(m[i])) // ignore NaN values in the array { sum += m[i]; + } else { + nanCount++; } } - return sum / m.length; + return sum / (double) (m.length - nanCount); + } + + /** + * calculates the sum of an array + * + * @param m ~ array + * @return + */ + public static double sum(double[] m) + { + double sum = 0; + for (int i = 0; i < m.length; i++) + { + if (!Double.isNaN(m[i])) // ignore NaN values in the array + { + sum += m[i]; + } + } + return sum; } /** @@ -69,7 +106,7 @@ public class MiscMath } /** - * calculate element wise multiplication of two arrays + * calculate element wise multiplication of two arrays with the same length * * @param a ~ array * @param b ~ array @@ -78,7 +115,7 @@ public class MiscMath */ public static double[] elementwiseMultiply(byte[] a, double[] b) throws RuntimeException { - if (a.length != b.length) + if (a.length != b.length) // throw exception if the arrays do not have the same length { throw new SameLengthException(a.length, b.length); } @@ -91,7 +128,7 @@ public class MiscMath } public static double[] elementwiseMultiply(double[] a, double[] b) throws RuntimeException { - if (a.length != b.length) + if (a.length != b.length) // throw exception if the arrays do not have the same length { throw new SameLengthException(a.length, b.length); } @@ -104,7 +141,7 @@ public class MiscMath } public static byte[] elementwiseMultiply(byte[] a, byte[] b) throws RuntimeException { - if (a.length != b.length) + if (a.length != b.length) // throw exception if the arrays do not have the same length { throw new SameLengthException(a.length, b.length); } @@ -115,9 +152,18 @@ public class MiscMath } return result; } + public static double[] elementwiseMultiply(double[] a, double b) + { + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = a[i] * b; + } + return result; + } /** - * calculate element wise division of two arrays + * calculate element wise division of two arrays ~ a / b * * @param a ~ array * @param b ~ array @@ -126,7 +172,7 @@ public class MiscMath */ public static double[] elementwiseDivide(double[] a, double[] b) throws RuntimeException { - if (a.length != b.length) + if (a.length != b.length) // throw exception if the arrays do not have the same length { throw new SameLengthException(a.length, b.length); } @@ -139,6 +185,38 @@ public class MiscMath } /** + * calculate element wise addition of two arrays + * + * @param a ~ array + * @param b ~ array + * + * @return + */ + public static double[] elementwiseAdd(double[] a, double[] b) throws RuntimeException + { + if (a.length != b.length) // throw exception if the arrays do not have the same length + { + throw new SameLengthException(a.length, b.length); + } + double[] result = new double[a.length]; + + for (int i = 0; i < a.length; i++) + { + result[i] += a[i] + b[i]; + } + return result; + } + public static double[] elementwiseAdd(double[] a, double b) + { + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) + { + result[i] = a[i] + b; + } + return result; + } + + /** * returns true if two arrays are element wise within a tolerance * * @param a ~ array @@ -154,11 +232,11 @@ public class MiscMath boolean areEqual = true; for (int i = 0; i < a.length; i++) { - if (equalNAN && (Double.isNaN(a[i]) && Double.isNaN(b[i]))) + if (equalNAN && (Double.isNaN(a[i]) && Double.isNaN(b[i]))) // if equalNAN == true -> skip the NaN pair { continue; } - if (Math.abs(a[i] - b[i]) > (atol + rtol * Math.abs(b[i]))) + if (Math.abs(a[i] - b[i]) > (atol + rtol * Math.abs(b[i]))) // check for the similarity condition -> if not met -> break and return false { areEqual = false; break; @@ -170,9 +248,9 @@ public class MiscMath /** * returns the index of the maximum and the maximum value of an array * - * @pararm a ~ array + * @param a ~ array * - * @returns + * @return */ public static int[] findMax(int[] a) { @@ -188,4 +266,62 @@ public class MiscMath } return new int[]{maxIndex, max}; } + + /** + * returns the dot product of two arrays + * @param a ~ array a + * @param b ~ array b + * + * @return + */ + public static double dot(double[] a, double[] b) + { + if (a.length != b.length) + { + throw new IllegalArgumentException(String.format("Vectors do not have the same length (%d, %d)!", a.length, b.length)); + } + + double aibi = 0; + for (int i = 0; i < a.length; i++) + { + aibi += a[i] * b[i]; + } + return aibi; + } + + /** + * returns the euklidian norm of the vector + * @param v ~ vector + * + * @return + */ + public static double norm(double[] v) + { + double result = 0; + for (double i : v) + { + result += Math.pow(i, 2); + } + return Math.sqrt(result); + } + + /** + * returns the number of NaN in the vector + * @param v ~ vector + * + * @return + */ + public static int countNaN(double[] v) + { + int cnt = 0; + for (double i : v) + { + if (Double.isNaN(i)) + { + cnt++; + } + } + return cnt; + } + } diff --git a/src/jalview/math/ccAnalysis.java b/src/jalview/math/ccAnalysis.java deleted file mode 100755 index 22ba162..0000000 --- a/src/jalview/math/ccAnalysis.java +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ - -/* -* Copyright 2018-2022 Kathy Su, Kay Diederichs -* -* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License along with this program. If not, see . -*/ - -/** -* Ported from https://doi.org/10.1107/S2059798317000699 by -* @AUTHOR MorellThomas -*/ - -package jalview.math; - -import jalview.bin.Console; - -import java.lang.Math; -import java.lang.System; -import java.util.Arrays; -import java.util.ArrayList; - -/** - * A class to model rectangular matrices of double values and operations on them - */ -public class ccAnalysis -{ - - /** TODO - * DOCUMENT ME - * - * @param hSigns ~ <++> - * @param scores ~ input score matrix - * - * @return distrustScores - */ - private static int[] initialiseDistrusts(byte[] hSigns, MatrixI scores) - { - int[] distrustScores = new int[scores.width()]; - - // loop over symmetric matrix - for (int i = 0; i < scores.width(); i++) - { - byte hASign = hSigns[i]; - int conHypNum = 0; - int proHypNum = 0; - - for (int j = 0; j < scores.width(); j++) - { - double cell = scores.getRow(i)[j]; // value at [i][j] in scores - byte hBSign = hSigns[j]; - if (!Double.isNaN(cell)) - { - byte cellSign = (byte) Math.signum(cell); - if (cellSign == hASign * hBSign) - { - proHypNum++; - } else { - conHypNum++; - } - } - } - distrustScores[i] = conHypNum - proHypNum; - } - return distrustScores; - } - - /** TODO - * DOCUMENT ME - * - * @param hSigns ~ <++> - * @param distrustScores ~ <++> - * @param scores ~ input score matrix - * - * @return hSigns - */ - private static byte[] optimiseHypothesis(byte[] hSigns, int[] distrustScores, MatrixI scores) - { - // get maximum distrust score - int[] maxes = MiscMath.findMax(distrustScores); - int maxDistrustIndex = maxes[0]; - int maxDistrust = maxes[1]; - - // if hypothesis is not optimal yet - if (maxDistrust > 0) - { - //toggle sign for hI with maximum distrust - hSigns[maxDistrustIndex] *= -1; - // update distrust at same position - distrustScores[maxDistrustIndex] *= -1; - - // also update distrust scores for all hI that were not changed - byte hASign = hSigns[maxDistrustIndex]; - for (int NOTmaxDistrustIndex = 0; NOTmaxDistrustIndex < distrustScores.length; NOTmaxDistrustIndex++) - { - if (NOTmaxDistrustIndex != maxDistrustIndex) - { - byte hBSign = hSigns[NOTmaxDistrustIndex]; - double cell = scores.getRow(maxDistrustIndex)[NOTmaxDistrustIndex]; - - // distrust only changed if not NaN - if (!Double.isNaN(cell)) - { - byte cellSign = (byte) Math.signum(cell); - // if sign of cell matches hypothesis decrease distrust by 2 because 1 more value supporting and 1 less contradicting - // else increase by 2 - if (cellSign == hASign * hBSign) - { - distrustScores[NOTmaxDistrustIndex] -= 2; - } else { - distrustScores[NOTmaxDistrustIndex] += 2; - } - } - } - } - //further optimisation necessary - return optimiseHypothesis(hSigns, distrustScores, scores); - - } else { - return hSigns; - } - } - - /** TODO - * DOCUMENT ME - * - * runs analysis - * - * @param scores ~ score matrix - * - * @return - */ - public static MatrixI run (MatrixI scores) - { - MatrixI eigenMatrix = scores.copy(); - try - { - /* - * Calculate correction factor for 2nd and higher eigenvalue(s). - * This correction is NOT needed for the 1st eigenvalue, because the - * unknown (=NaN) values of the matrix are approximated by presuming - * 1-dimensional vectors as the basis of the matrix interpretation as dot - * products. - */ - MatrixI scoresOld = scores.copy(); - //&! debug - System.out.println("input:"); - eigenMatrix.print(System.out, "%8.2f"); - int matrixWidth = eigenMatrix.width(); // square matrix, so width == height - int matrixElementsTotal = matrixWidth * 2; //total number of elemts - - int correlationFactor = (matrixElementsTotal - eigenMatrix.countNaN()) / matrixElementsTotal; - - /* - * Calculate hypothetical value (1-dimensional vector) h_i for each - * dataset by interpreting the given correlation coefficients as scalar - * products. - */ - - /* - * Memory for current hypothesis concerning sign of each h_i. - * List of signs for all h_i in the encoding: - * * 1: positive - * * 0: zero - * * -1: negative - * Initial hypothesis: all signs are positive. - */ - byte[] hSigns = new byte[matrixWidth]; - Arrays.fill(hSigns, (byte) 1); - - //Estimate signs for each h_i by refining hypothesis on signs. - hSigns = optimiseHypothesis(hSigns, initialiseDistrusts(hSigns, eigenMatrix), eigenMatrix); - - //Estimate absolute values for each h_i by determining sqrt of mean of - //non-NaN absolute values for every row. - //<++> hAbs = //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) - double[] hAbs = MiscMath.sqrt(eigenMatrix.absolute().meanRow()); //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1)) - - //Combine estimated signs with absolute values in obtain total value for - //each h_i. - double[] hValues = MiscMath.elementwiseMultiply(hSigns, hAbs); - //<++>hValues.reshape((1,matrixWidth)); // doesnt it already look like this - - /*Complement symmetric matrix by using the scalar products of estimated - *values of h_i to replace NaN-cells. - *Matrix positions that have estimated values - *(only for diagonal and upper off-diagonal values, due to the symmetry - *the positions of the lower-diagonal values can be inferred). - List of tuples (row_idx, column_idx).*/ - - ArrayList estimatedPositions = new ArrayList(); - - // for off-diagonal cells - for (int rowIndex = 0; rowIndex < matrixWidth - 1; rowIndex++) - { - for (int columnIndex = rowIndex + 1; columnIndex < matrixWidth; columnIndex++) - { - double cell = eigenMatrix.getValue(rowIndex, columnIndex); - if (Double.isNaN(cell)) - { - //calculate scalar product as new cell value - cell = hValues[rowIndex] * hValues[columnIndex]; // something is wrong with hAbs andhValues and everything!!!!!!!!!!!! - //fill in new value in cell and symmetric partner - eigenMatrix.setValue(rowIndex, columnIndex, cell); - eigenMatrix.setValue(columnIndex, rowIndex, cell); - //save positions of estimated values - estimatedPositions.add(new int[]{rowIndex, columnIndex}); - } - } - } - - // for diagonal cells - for (int diagonalIndex = 0; diagonalIndex < matrixWidth; diagonalIndex++) - { - double cell = Math.pow(hValues[diagonalIndex], 2); - eigenMatrix.setValue(diagonalIndex, diagonalIndex, cell); - estimatedPositions.add(new int[]{diagonalIndex, diagonalIndex}); - } - - /*Refine total values of each h_i: - *Initialise h_values of the hypothetical non-existant previous iteration - *with the correct format but with impossible values. - Needed for exit condition of otherwise endless loop.*/ - - double[] hValuesOld = new double[matrixWidth]; - - int iterationCount = 0; - - // repeat unitl values of h do not significantly change anymore - while (true) - { - for (int hIndex = 0; hIndex < matrixWidth; hIndex++) - { - //@python newH = np.sum(hValues * eigenMatrix[hIndex]) / np.sum(hValues ** 2) - double newH = Arrays.stream(MiscMath.elementwiseMultiply(hValues, eigenMatrix.getRow(hIndex))).sum() / Arrays.stream(MiscMath.elementwiseMultiply(hValues, hValues)).sum(); - hValues[hIndex] = newH; - - } - //update values of estimated positions - for (int[] pair : estimatedPositions) // pair ~ row, col - { - double newVal = hValues[pair[0]] * hValues[pair[1]]; - eigenMatrix.setValue(pair[0], pair[1], newVal); - eigenMatrix.setValue(pair[1], pair[0], newVal); - } - - iterationCount++; - - //exit loop as soon as new values are similar to the last iteration - if (MiscMath.allClose(hValues, hValuesOld, 0d, 1e-05d, false)) - { - break; - } - - //save hValues for comparison in the next iteration - System.arraycopy(hValues, 0, hValuesOld, 0, hValues.length); - } - - //----------------------------- - //Use complemented symmetric matrix to calculate final representative - //vectors. - //&! debug - System.out.println("after estimating:"); - eigenMatrix.print(System.out, "%8.2f"); - - //Eigendecomposition. - //eigenMatrix.tqli(); - - //&! debug - //eigenMatrix.print(System.out, "%8.2f"); - - - } catch (Exception q) - { - Console.error("Error computing cc_analysis: " + q.getMessage()); - q.printStackTrace(); - } - return eigenMatrix; - } -} diff --git a/src/jalview/viewmodel/PaSiMapModel.java b/src/jalview/viewmodel/PaSiMapModel.java index c9545a8..1f5e2dc 100644 --- a/src/jalview/viewmodel/PaSiMapModel.java +++ b/src/jalview/viewmodel/PaSiMapModel.java @@ -101,14 +101,19 @@ public class PaSiMapModel ii++; } + int width = pasimap.getWidth(); int height = pasimap.getHeight(); // top = pasimap.getM().height() - 1; - top = height - 1; + //&! + //top = height - 1; + top = height; points = new Vector<>(); - Point[] scores = pasimap.getComponents(top - 1, top - 2, top - 3, 100); + //&! + //Point[] scores = pasimap.getComponents(top - 1, top - 2, top - 3, 100); + Point[] scores = pasimap.getComponents(width - 1, width - 2, width - 3, 1); - for (int i = 0; i < height; i++) + for (int i = 0; i < top; i++) { SequencePoint sp = new SequencePoint(seqs[i], scores[i]); points.add(sp); @@ -158,9 +163,11 @@ public class PaSiMapModel public void updateRcView(int dim1, int dim2, int dim3) { // note: actual indices for components are dim1-1, etc (patch for JAL-1123) - Point[] scores = pasimap.getComponents(dim1 - 1, dim2 - 1, dim3 - 1, 100); + //&! + //Point[] scores = pasimap.getComponents(dim1 - 1, dim2 - 1, dim3 - 1, 100); + Point[] scores = pasimap.getComponents(dim1 - 1, dim2 - 1, dim3 - 1, 1); - for (int i = 0; i < pasimap.getHeight(); i++) + for (int i = 0; i < pasimap.getWidth(); i++) { points.get(i).coord = scores[i]; }