import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import jalview.math.MiscMath;
import jalview.util.Comparison;
import jalview.util.Format;
import jalview.util.MapList;
import java.awt.Color;
import java.awt.Graphics;
import java.io.PrintStream;
+import java.lang.IllegalArgumentException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.StringTokenizer;
{
private static final int MAX_NAME_LENGTH = 30;
- private static final int GAP_OPEN_COST = 120;
+ //&!
+ //private static final int GAP_OPEN_COST = 120;
+ private static final int GAP_OPEN_COST = 10;
- private static final int GAP_EXTEND_COST = 20;
+ //private static final int GAP_EXTEND_COST = 20;
+ private static final float GAP_EXTEND_COST = 0.5f;
private static final int GAP_INDEX = -1;
float[][] score;
+ float alignmentScore;
+
float[][] E;
float[][] F;
public String astr2 = "";
+ public String indelfreeAstr1 = "";
+
+ public String indelfreeAstr2 = "";
+
/** DOCUMENT ME!! */
public int seq1start;
public float maxscore;
+ public float meanScore; //needed for PaSiMap
+
+ public float hypotheticMaxScore; // needed for PaSiMap
+
int prev = 0;
StringBuffer output = new StringBuffer();
}
/**
+ * returns the overall score of the alignment
+ *
+ * @return
+ */
+ public float getAlignmentScore()
+ {
+ return alignmentScore;
+ }
+
+ /**
* DOCUMENT ME!
*
* @return DOCUMENT ME!
/**
* DOCUMENT ME!
*/
+ //&! not / 10
public void traceAlignment()
{
// Find the maximum score along the rhs or bottom row
int i = maxi;
int j = maxj;
int trace;
- maxscore = score[i][j] / 10f;
+ //maxscore = score[i][j] / 10f;
+ maxscore = score[i][j];
+
+ //&! get trailing gaps
+ while ((i < seq1.length - 1) || (j < seq2.length - 1))
+ {
+ i++;
+ j++;
+ }
+ seq1end = i + 1;
+ seq2end = j + 1;
- seq1end = maxi + 1;
- seq2end = maxj + 1;
aseq1 = new int[seq1.length + seq2.length];
aseq2 = new int[seq1.length + seq2.length];
count = (seq1.length + seq2.length) - 1;
+ //&! get trailing gaps
+ while ((i >= seq1.length) || (j >= seq2.length))
+ {
+ if (i >= seq1.length)
+ {
+ aseq1[count] = GAP_INDEX;
+ sb1.append("-");
+ aseq2[count] = seq2[j];
+ sb2.append(s2str.charAt(j));
+ } else if (j >= seq2.length) {
+ aseq1[count] = seq1[i];
+ sb1.append(s1str.charAt(i));
+ aseq2[count] = GAP_INDEX;
+ sb2.append("-");
+ }
+ i--;
+ j--;
+ }
+
+
while (i > 0 && j > 0)
{
aseq1[count] = seq1[i];
sb2.append(s2str.charAt(j));
}
+ //&! get initial gaps
+ while (j > 0 || i > 0)
+ {
+ if (j > 0)
+ {
+ sb1.append("-");
+ sb2.append(s2str.charAt(j));
+ j--;
+ } else if (i > 0) {
+ sb1.append(s1str.charAt(i));
+ sb2.append("-");
+ i--;
+ }
+ }
+
/*
* we built the character strings backwards, so now
* reverse them to convert to sequence strings
*
* @return DOCUMENT ME!
*/
+ //&! not * 10
public int findTrace(int i, int j)
{
int t = 0;
float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
s2str.charAt(j));
- float max = score[i - 1][j - 1] + (pairwiseScore * 10);
+ //float max = score[i - 1][j - 1] + (pairwiseScore * 10);
+ float max = score[i - 1][j - 1] + (pairwiseScore);
if (F[i][j] > max)
{
/**
* DOCUMENT ME!
*/
+ //&! not * 10
public void calcScoreMatrix()
{
int n = seq1.length;
// top left hand element
score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0),
- s2str.charAt(0)) * 10;
+ s2str.charAt(0));
+ //s2str.charAt(0)) * 10;
E[0][0] = -GAP_EXTEND_COST;
F[0][0] = 0;
float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0),
s2str.charAt(j));
- score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST,
+ //score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST,
+ score[0][j] = max(pairwiseScore, -GAP_OPEN_COST,
-GAP_EXTEND_COST);
traceback[0][j] = 1;
float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
s2str.charAt(0));
- score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
+ //score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
+ score[i][0] = max(pairwiseScore, E[i][0], F[i][0]);
traceback[i][0] = -1;
}
float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
s2str.charAt(j));
- score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10),
+ //score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10),
+ score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore),
E[i][j], F[i][j]);
traceback[i][j] = findTrace(i, j);
}
* @param type
* AlignSeq.DNA or AlignSeq.PEP
*/
- //&! 1 v 1 needleman
public static AlignSeq doGlobalNWAlignment(SequenceI s1, SequenceI s2,
String type)
{
}
return redundancy;
}
+
+ /**
+ * calculate the mean score of the alignment
+ * mean score is equal to the score of an alignmenet of two sequences with randomly shuffled AA sequence composited of the same AA as the two original sequences
+ *
+ */
+ public void meanScore()
+ {
+ int length = (indelfreeAstr1.length() > indelfreeAstr2.length()) ? indelfreeAstr1.length() : indelfreeAstr2.length();
+ HashMap<Character, Integer> seq1ResCount = new HashMap<Character, Integer>();
+ HashMap<Character, Integer> seq2ResCount = new HashMap<Character, Integer>();
+
+ for (char residue: s1str.toCharArray())
+ {
+ seq1ResCount.putIfAbsent(residue, 0);
+ seq1ResCount.replace(residue, seq1ResCount.get(residue) + 1);
+ }
+ for (char residue: s2str.toCharArray())
+ {
+ seq2ResCount.putIfAbsent(residue, 0);
+ seq2ResCount.replace(residue, seq2ResCount.get(residue) + 1);
+ }
+
+ float _meanscore = 0;
+ for (char resA : seq1ResCount.keySet())
+ {
+ for (char resB : seq2ResCount.keySet())
+ {
+ int countA = seq1ResCount.get(resA);
+ int countB = seq2ResCount.get(resB);
+
+ float scoreAB = scoreMatrix.getPairwiseScore(resA, resB);
+
+ _meanscore += countA * countB * scoreAB;
+ }
+ }
+ _meanscore /= length;
+ this.meanScore = _meanscore;
+ }
+
+ public float getMeanScore()
+ {
+ return this.meanScore;
+ }
+
+ /**
+ * calculate the hypothetic max score using the self-alignment of the sequences
+ */
+ public void hypotheticMaxScore()
+ {
+ float _hmsA = 0f;
+ float _hmsB = 0f;
+ for (char residue: s1str.toCharArray())
+ {
+ _hmsA += scoreMatrix.getPairwiseScore(residue, residue);
+ }
+ for (char residue: s2str.toCharArray())
+ {
+ _hmsB += scoreMatrix.getPairwiseScore(residue, residue);
+ }
+ this.hypotheticMaxScore = (_hmsA < _hmsB) ? _hmsA : _hmsB;
+ }
+
+ public float getHypotheticMaxScore()
+ {
+ return this.hypotheticMaxScore;
+ }
+
+ /**
+ * create strings based of astr1 and astr2 but without gaps
+ */
+ public void getIndelfreeAstr()
+ {
+ int n = astr1.length(); // both have the same length
+ for (int i = 0; i < n; i++)
+ {
+ if (Character.isLetter(astr1.charAt(i)) && Character.isLetter(astr2.charAt(i)))
+ {
+ this.indelfreeAstr1 += astr1.charAt(i);
+ this.indelfreeAstr2 += astr2.charAt(i);
+ }
+ }
+ }
+
+ /**
+ * calculates the overall score of the alignment
+ * alignmentScore = sum of all scores - all penalties
+ *
+ */
+ public void scoreAlignment() throws RuntimeException
+ {
+ getIndelfreeAstr();
+ meanScore();
+ hypotheticMaxScore();
+ // cannot calculate score because denominator would be zero
+ if (this.hypotheticMaxScore == this.meanScore)
+ {
+ throw new IllegalArgumentException(String.format("hypotheticMaxScore (%8.2f) == meanScore (%8.2f) - division by 0", hypotheticMaxScore, meanScore));
+ }
+ int n = (astr1.length() > astr2.length()) ? astr1.length() : astr2.length();
+
+ System.out.println(String.format("astr1: %c .. %c ; astr2: %c .. %c", astr1.toCharArray()[0], astr1.toCharArray()[astr1.length() - 1], astr2.toCharArray()[0], astr2.toCharArray()[astr2.length() - 1]));
+
+ float score = 0;
+ boolean aGapOpen = false;
+ boolean bGapOpen = false;
+ for (int i = 0; i < n; i++)
+ {
+ char char1 = astr1.charAt(i);
+ char char2 = astr2.charAt(i);
+ boolean aIsLetter = Character.isLetter(char1);
+ boolean bIsLetter = Character.isLetter(char2);
+ if (aIsLetter && bIsLetter) // if pair -> get score
+ {
+ score += scoreMatrix.getPairwiseScore(char1, char2);
+ } else if (!aIsLetter && !bIsLetter) { // both are gap -> skip
+ } else if ((!aIsLetter && aGapOpen) || (!bIsLetter && bGapOpen)) { // one side gapopen -> score - gap_extend
+ score -= GAP_EXTEND_COST;
+ } else { // no gap open -> score - gap_open
+ score -= GAP_OPEN_COST;
+ }
+ aGapOpen = (!aIsLetter) ? true : false;
+ bGapOpen = (!bIsLetter) ? true : false;
+ }
+
+ float preprescore = score;
+ score = (score - this.meanScore) / (this.hypotheticMaxScore - this.meanScore);
+ int[] _max = MiscMath.findMax(new int[]{astr1.replace("-","").length(), astr2.replace("-","").length()}); // {index of max, max}
+ float coverage = (float) indelfreeAstr1.length() / (float) _max[1];
+ float prescore = score;
+ score *= coverage;
+
+ System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %f, mean: %f, max: %f", preprescore, prescore, indelfreeAstr1.length(), score, this.meanScore, this.hypotheticMaxScore));
+ this.alignmentScore = (preprescore < 1) ? Float.NaN : score;
+ }
}
connectivity.putIfAbsent(sequences[j], 0);
int iOld = connectivity.get(sequences[i]);
int jOld = connectivity.get(sequences[j]);
- // count the connection if its score is bigger than 0.1
- if (scores[i][j] >= 0.1) // what is cutoff instead of just 0.1?
+ // count the connection if its score is not NaN
+ if (!Float.isNaN(scores[i][j]))
{
connectivity.put(sequences[i], ++iOld);
connectivity.put(sequences[j], ++jOld);
}
}
}
- //Debug
- for (int i = 0; i < sequences.length; i++)
- {
- System.out.println(connectivity.get(sequences[i]));
- }
// if a sequence has too few connections, abort
connectivity.forEach((sequence, connection) ->
{
+ System.out.println(String.format("%s: %d", sequence.getName(), connection));
if (connection < dim)
{
// a popup saying that it failed would be nice
- throw new ConnectivityException(sequence.getName(), connection, dim);
+ //throw new ConnectivityException(sequence.getName(), connection, dim);
}
} );
import jalview.datamodel.SequenceI;
import jalview.gui.PairwiseAlignPanel;
import jalview.gui.PaSiMapPanel;
+import jalview.math.ccAnalysis;
+import jalview.math.Matrix;
import jalview.math.MatrixI;
import jalview.viewmodel.AlignmentViewport;
// run needleman regardless if aligned or not
// gui.PairwiseAlignPanel <++>
PairwiseAlignPanel alignment = new PairwiseAlignPanel(seqs);
- float[][] scores = alignment.getScores();
- System.out.println(scores[8][18]); // do scores have to be divided by totscore??
+ float[][] scores = alignment.getAlignmentScores(); //bigger index first -- eg scores[14][13]
+ /** TODO scores still not same as in original pasimap */
+ System.out.println(scores[8][0]);
- // what is connectivity and why?
Hashtable<SequenceI, Integer> connectivity = seqs.calculateConnectivity(scores, dim);
+ pairwiseScores = new Matrix(scores);
+ System.out.println(pairwiseScores.getValue(8,0));
+ pairwiseScores.fillDiagonal();
+ System.out.println(pairwiseScores.getValue(8,0));
+ /** TODO implement cc_analysis */
+ pairwiseScores = ccAnalysis.run(pairwiseScores);
- /** pca code */
- /*
- * sequence pairwise similarity scores
- */
- //&! analysis/scoremodels/ScoreMatrix computeSimilarity
- //pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams);
- /*
- * tridiagonal matrix
- */
- //tridiagonal = pairwiseScores.copy();
- //tridiagonal.tred();
+ /** code like in pca, needed for plot */
+ tridiagonal = pairwiseScores.copy();
+ tridiagonal.tred();
+ eigenMatrix = tridiagonal.copy();
+ eigenMatrix.tqli();
+ System.out.println(getDetails());
+
- /*
- * the diagonalization matrix
- */
- //eigenMatrix = tridiagonal.copy();
- //eigenMatrix.tqli();
} catch (Exception q)
{
Console.error("Error computing PaSiMap: " + q.getMessage());
private float[][] scores;
+ private float[][] alignmentScores; // scores used by PaSiMap
+
AlignmentViewport av;
Vector<SequenceI> sequences;
: AlignSeq.PEP;
float[][] scores = new float[seqs.length][seqs.length];
+ float[][] alignmentScores = new float[seqs.length][seqs.length];
double totscore = 0D;
int count = seqs.length;
boolean first = true;
for (int i = 1; i < count; i++)
{
+ // fill diagonal alignmentScores with Float.NaN
+ alignmentScores[i-1][i-1] = Float.NaN;
for (int j = 0; j < i; j++)
{
AlignSeq as = new AlignSeq(seqs[i], seqStrings[i], seqs[j],
as.calcScoreMatrix();
as.traceAlignment();
+ as.scoreAlignment();
if (!first)
{
first = false;
as.printAlignment(System.out);
scores[i][j] = as.getMaxScore() / as.getASeq1().length;
+ alignmentScores[i][j] = as.getAlignmentScore();
+ //&!
totscore = totscore + scores[i][j];
textarea.append(as.getOutput());
sequences.add(as.getAlignedSeq2());
}
}
+ alignmentScores[count-1][count-1] = Float.NaN;
this.scores = scores;
+ this.alignmentScores = alignmentScores;
if (count > 2)
{
return this.scores;
}
+ public float[][] getAlignmentScores()
+ {
+ return this.alignmentScores;
+ }
+
/**
* Prints a matrix of seqi-seqj pairwise alignment scores to sysout
*
import jalview.util.MessageManager;
import java.io.PrintStream;
+import java.lang.Math;
import java.util.Arrays;
/**
}
}
+ public Matrix(float[][] values)
+ {
+ this.rows = values.length;
+ this.cols = this.rows == 0 ? 0 : values[0].length;
+
+ /*
+ * make a copy of the values array, for immutability
+ */
+ this.value = new double[rows][];
+ int i = 0;
+ for (float[] row : values)
+ {
+ if (row != null)
+ {
+ value[i] = new double[row.length];
+ int j = 0;
+ for (float oldValue : row)
+ {
+ value[i][j] = oldValue;
+ j++;
+ }
+ }
+ i++;
+ }
+ }
+
@Override
public MatrixI transpose()
{
}
return true;
}
+
+ /**
+ * Returns a copy in which every value in the matrix is its absolute
+ *
+ */
+ @Override
+ public MatrixI absolute()
+ {
+ MatrixI copy = this.copy();
+ for (int i = 0; i < copy.width(); i++)
+ {
+ double[] row = copy.getRow(i);
+ if (row != null)
+ {
+ for (int j = 0; j < row.length; j++)
+ {
+ row[j] = Math.abs(row[j]);
+ }
+ }
+ }
+ return copy;
+ }
+
+ /**
+ * Returns the mean of each row
+ *
+ */
+ @Override
+ public double[] meanRow()
+ {
+ double[] mean = new double[rows];
+ int i = 0;
+ for (double[] row : value)
+ {
+ if (row != null)
+ {
+ mean[i++] = MiscMath.mean(row);
+ }
+ }
+ return mean;
+ }
+
+ /**
+ * Returns the mean of each column
+ *
+ */
+ @Override
+ public double[] meanCol()
+ {
+ double[] mean = new double[cols];
+ for (int j = 0; j < cols; j++)
+ {
+ double[] column = getColumn(j);
+ if (column != null)
+ {
+ mean[j] = MiscMath.mean(column);
+ }
+ }
+ return mean;
+ }
+
+ /**
+ * fills up a diagonal matrix with its transposed copy
+ * !other side should be filled with 0
+ *
+ * TODO check on which side it was diagonal and only do calculations for the other side
+ */
+ @Override
+ public void fillDiagonal()
+ {
+ int n = this.rows;
+ int m = this.cols;
+ MatrixI copy = this.transpose();
+ for (int i = 0; i < n; i++)
+ {
+ for (int j = 0; j < m; j++)
+ {
+ if (i != j)
+ {
+ this.addValue(i, j, copy.getValue(i,j));
+ }
+ }
+ }
+ }
+
+ /**
+ * counts the number of Double.NaN in the matrix
+ *
+ * @return
+ */
+ @Override
+ public int countNaN()
+ {
+ int NaN = 0;
+ for (int i = 0; i < this.rows; i++)
+ {
+ for (int j = 0; j < this.cols; j++)
+ {
+ if (Double.isNaN(this.getValue(i,j)))
+ {
+ NaN++;
+ }
+ }
+ }
+ return NaN;
+ }
+
}
* @return
*/
boolean equals(MatrixI m2, double delta);
+
+ /**
+ * Returns a copy in which every value in the matrix is its absolute
+ */
+ MatrixI absolute();
+
+ /**
+ * Returns the mean of each row
+ */
+ double[] meanRow();
+
+ /**
+ * Returns the mean of each column
+ */
+ double[] meanCol();
+
+ /**
+ * fills up a diagonal matrix with its transposed copy
+ * !other side should be filled with either 0 or Double.NaN
+ */
+ void fillDiagonal();
+
+ /**
+ * counts the number of Double.NaN in the matrix
+ *
+ * @return
+ */
+ int countNaN();
}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.math;
+
+import java.lang.Math;
+import java.util.Arrays;
+
+/**
+ * A collection of miscellaneous mathematical operations
+ * @AUTHOR MorellThomas
+ */
+public class MiscMath
+{
+ /**
+ * calculates the mean of an array
+ *
+ * @param m ~ array
+ * @return
+ * TODO whats with nans??
+ */
+ public static double mean(double[] m)
+ {
+ double sum = 0;
+ for (int i = 0; i < m.length; i++)
+ {
+ if (!Double.isNaN(m[i]))
+ {
+ sum += m[i];
+ }
+ }
+ return sum / m.length;
+ }
+
+ /**
+ * calculates the square root of each element in an array
+ *
+ * @param m ~ array
+ *
+ * @return
+ * TODO
+ * make general with function passed -> apply function to each element
+ */
+ public static double[] sqrt(double[] m)
+ {
+ double[] sqrts = new double[m.length];
+ for (int i = 0; i < m.length; i++)
+ {
+ sqrts[i] = Math.sqrt(m[i]);
+ }
+ return sqrts;
+ }
+
+ /**
+ * calculate element wise multiplication of two arrays
+ *
+ * @param a ~ array
+ * @param b ~ array
+ *
+ * @return
+ */
+ public static double[] elementwiseMultiply(byte[] a, double[] b) throws RuntimeException
+ {
+ if (a.length != b.length)
+ {
+ throw new SameLengthException(a.length, b.length);
+ }
+ double[] result = new double[a.length];
+ for (int i = 0; i < a.length; i++)
+ {
+ result[i] = a[i] * b[i];
+ }
+ return result;
+ }
+ public static double[] elementwiseMultiply(double[] a, double[] b) throws RuntimeException
+ {
+ if (a.length != b.length)
+ {
+ throw new SameLengthException(a.length, b.length);
+ }
+ double[] result = new double[a.length];
+ for (int i = 0; i < a.length; i++)
+ {
+ result[i] = a[i] * b[i];
+ }
+ return result;
+ }
+ public static byte[] elementwiseMultiply(byte[] a, byte[] b) throws RuntimeException
+ {
+ if (a.length != b.length)
+ {
+ throw new SameLengthException(a.length, b.length);
+ }
+ byte[] result = new byte[a.length];
+ for (int i = 0; i < a.length; i++)
+ {
+ result[i] = (byte) (a[i] * b[i]);
+ }
+ return result;
+ }
+
+ /**
+ * calculate element wise division of two arrays
+ *
+ * @param a ~ array
+ * @param b ~ array
+ *
+ * @return
+ */
+ public static double[] elementwiseDivide(double[] a, double[] b) throws RuntimeException
+ {
+ if (a.length != b.length)
+ {
+ throw new SameLengthException(a.length, b.length);
+ }
+ double[] result = new double[a.length];
+ for (int i = 0; i < a.length; i++)
+ {
+ result[i] = a[i] / b[i];
+ }
+ return result;
+ }
+
+ /**
+ * returns true if two arrays are element wise within a tolerance
+ *
+ * @param a ~ array
+ * @param b ~ array
+ * @param rtol ~ relative tolerance
+ * @param atol ~ absolute tolerance
+ * @param equalNAN ~ whether NaN at the same position return true
+ *
+ * @return
+ */
+ public static boolean allClose(double[] a, double[] b, double rtol, double atol, boolean equalNAN)
+ {
+ boolean areEqual = true;
+ for (int i = 0; i < a.length; i++)
+ {
+ if (equalNAN && (Double.isNaN(a[i]) && Double.isNaN(b[i])))
+ {
+ continue;
+ }
+ if (Math.abs(a[i] - b[i]) > (atol + rtol * Math.abs(b[i])))
+ {
+ areEqual = false;
+ break;
+ }
+ }
+ return areEqual;
+ }
+
+ /**
+ * returns the index of the maximum and the maximum value of an array
+ *
+ * @pararm a ~ array
+ *
+ * @returns
+ */
+ public static int[] findMax(int[] a)
+ {
+ int max = 0;
+ int maxIndex = 0;
+ for (int i = 0; i < a.length; i++)
+ {
+ if (a[i] > max)
+ {
+ max = a[i];
+ maxIndex = i;
+ }
+ }
+ return new int[]{maxIndex, max};
+ }
+}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.math;
+
+public class SameLengthException extends RuntimeException
+{
+ public SameLengthException(int lengthA, int lengthB)
+ {
+ this("Your arrays do not have the same length!", lengthA, lengthB);
+ }
+
+ public SameLengthException(String message, int lengthA, int lengthB)
+ {
+ super(String.format("%s (%d and %d)", message, lengthA, lengthB));
+ }
+
+}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+
+/*
+* Copyright 2018-2022 Kathy Su, Kay Diederichs
+*
+* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+/**
+* Ported from https://doi.org/10.1107/S2059798317000699 by
+* @AUTHOR MorellThomas
+*/
+
+package jalview.math;
+
+import jalview.bin.Console;
+
+import java.lang.Math;
+import java.lang.System;
+import java.util.Arrays;
+import java.util.ArrayList;
+
+/**
+ * A class to model rectangular matrices of double values and operations on them
+ */
+public class ccAnalysis
+{
+
+ /** TODO
+ * DOCUMENT ME
+ *
+ * @param hSigns ~ <++>
+ * @param scores ~ input score matrix
+ *
+ * @return distrustScores
+ */
+ private static int[] initialiseDistrusts(byte[] hSigns, MatrixI scores)
+ {
+ int[] distrustScores = new int[scores.width()];
+
+ // loop over symmetric matrix
+ for (int i = 0; i < scores.width(); i++)
+ {
+ byte hASign = hSigns[i];
+ int conHypNum = 0;
+ int proHypNum = 0;
+
+ for (int j = 0; j < scores.width(); j++)
+ {
+ double cell = scores.getRow(i)[j]; // value at [i][j] in scores
+ byte hBSign = hSigns[j];
+ if (!Double.isNaN(cell))
+ {
+ byte cellSign = (byte) Math.signum(cell);
+ if (cellSign == hASign * hBSign)
+ {
+ proHypNum++;
+ } else {
+ conHypNum++;
+ }
+ }
+ }
+ distrustScores[i] = conHypNum - proHypNum;
+ }
+ return distrustScores;
+ }
+
+ /** TODO
+ * DOCUMENT ME
+ *
+ * @param hSigns ~ <++>
+ * @param distrustScores ~ <++>
+ * @param scores ~ input score matrix
+ *
+ * @return hSigns
+ */
+ private static byte[] optimiseHypothesis(byte[] hSigns, int[] distrustScores, MatrixI scores)
+ {
+ // get maximum distrust score
+ int[] maxes = MiscMath.findMax(distrustScores);
+ int maxDistrustIndex = maxes[0];
+ int maxDistrust = maxes[1];
+
+ // if hypothesis is not optimal yet
+ if (maxDistrust > 0)
+ {
+ //toggle sign for hI with maximum distrust
+ hSigns[maxDistrustIndex] *= -1;
+ // update distrust at same position
+ distrustScores[maxDistrustIndex] *= -1;
+
+ // also update distrust scores for all hI that were not changed
+ byte hASign = hSigns[maxDistrustIndex];
+ for (int NOTmaxDistrustIndex = 0; NOTmaxDistrustIndex < distrustScores.length; NOTmaxDistrustIndex++)
+ {
+ if (NOTmaxDistrustIndex != maxDistrustIndex)
+ {
+ byte hBSign = hSigns[NOTmaxDistrustIndex];
+ double cell = scores.getRow(maxDistrustIndex)[NOTmaxDistrustIndex];
+
+ // distrust only changed if not NaN
+ if (!Double.isNaN(cell))
+ {
+ byte cellSign = (byte) Math.signum(cell);
+ // if sign of cell matches hypothesis decrease distrust by 2 because 1 more value supporting and 1 less contradicting
+ // else increase by 2
+ if (cellSign == hASign * hBSign)
+ {
+ distrustScores[NOTmaxDistrustIndex] -= 2;
+ } else {
+ distrustScores[NOTmaxDistrustIndex] += 2;
+ }
+ }
+ }
+ }
+ //further optimisation necessary
+ return optimiseHypothesis(hSigns, distrustScores, scores);
+
+ } else {
+ return hSigns;
+ }
+ }
+
+ /** TODO
+ * DOCUMENT ME
+ *
+ * runs analysis
+ *
+ * @param scores ~ score matrix
+ *
+ * @return
+ */
+ public static MatrixI run (MatrixI scores)
+ {
+ MatrixI eigenMatrix = scores.copy();
+ try
+ {
+ /*
+ * Calculate correction factor for 2nd and higher eigenvalue(s).
+ * This correction is NOT needed for the 1st eigenvalue, because the
+ * unknown (=NaN) values of the matrix are approximated by presuming
+ * 1-dimensional vectors as the basis of the matrix interpretation as dot
+ * products.
+ */
+ MatrixI scoresOld = scores.copy();
+ //&! debug
+ System.out.println("input:");
+ eigenMatrix.print(System.out, "%8.2f");
+ int matrixWidth = eigenMatrix.width(); // square matrix, so width == height
+ int matrixElementsTotal = matrixWidth * 2; //total number of elemts
+
+ int correlationFactor = (matrixElementsTotal - eigenMatrix.countNaN()) / matrixElementsTotal;
+
+ /*
+ * Calculate hypothetical value (1-dimensional vector) h_i for each
+ * dataset by interpreting the given correlation coefficients as scalar
+ * products.
+ */
+
+ /*
+ * Memory for current hypothesis concerning sign of each h_i.
+ * List of signs for all h_i in the encoding:
+ * * 1: positive
+ * * 0: zero
+ * * -1: negative
+ * Initial hypothesis: all signs are positive.
+ */
+ byte[] hSigns = new byte[matrixWidth];
+ Arrays.fill(hSigns, (byte) 1);
+
+ //Estimate signs for each h_i by refining hypothesis on signs.
+ hSigns = optimiseHypothesis(hSigns, initialiseDistrusts(hSigns, eigenMatrix), eigenMatrix);
+
+ //Estimate absolute values for each h_i by determining sqrt of mean of
+ //non-NaN absolute values for every row.
+ //<++> hAbs = //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1))
+ double[] hAbs = MiscMath.sqrt(eigenMatrix.absolute().meanRow()); //np.sqrt(np.nanmean(np.absolute(eigenMatrix), axis=1))
+
+ //Combine estimated signs with absolute values in obtain total value for
+ //each h_i.
+ double[] hValues = MiscMath.elementwiseMultiply(hSigns, hAbs);
+ //<++>hValues.reshape((1,matrixWidth)); // doesnt it already look like this
+
+ /*Complement symmetric matrix by using the scalar products of estimated
+ *values of h_i to replace NaN-cells.
+ *Matrix positions that have estimated values
+ *(only for diagonal and upper off-diagonal values, due to the symmetry
+ *the positions of the lower-diagonal values can be inferred).
+ List of tuples (row_idx, column_idx).*/
+
+ ArrayList<int[]> estimatedPositions = new ArrayList<int[]>();
+
+ // for off-diagonal cells
+ for (int rowIndex = 0; rowIndex < matrixWidth - 1; rowIndex++)
+ {
+ for (int columnIndex = rowIndex + 1; columnIndex < matrixWidth; columnIndex++)
+ {
+ double cell = eigenMatrix.getValue(rowIndex, columnIndex);
+ if (Double.isNaN(cell))
+ {
+ //calculate scalar product as new cell value
+ cell = hValues[rowIndex] * hValues[columnIndex]; // something is wrong with hAbs andhValues and everything!!!!!!!!!!!!
+ //fill in new value in cell and symmetric partner
+ eigenMatrix.setValue(rowIndex, columnIndex, cell);
+ eigenMatrix.setValue(columnIndex, rowIndex, cell);
+ //save positions of estimated values
+ estimatedPositions.add(new int[]{rowIndex, columnIndex});
+ }
+ }
+ }
+
+ // for diagonal cells
+ for (int diagonalIndex = 0; diagonalIndex < matrixWidth; diagonalIndex++)
+ {
+ double cell = Math.pow(hValues[diagonalIndex], 2);
+ eigenMatrix.setValue(diagonalIndex, diagonalIndex, cell);
+ estimatedPositions.add(new int[]{diagonalIndex, diagonalIndex});
+ }
+
+ /*Refine total values of each h_i:
+ *Initialise h_values of the hypothetical non-existant previous iteration
+ *with the correct format but with impossible values.
+ Needed for exit condition of otherwise endless loop.*/
+
+ double[] hValuesOld = new double[matrixWidth];
+
+ int iterationCount = 0;
+
+ // repeat unitl values of h do not significantly change anymore
+ while (true)
+ {
+ for (int hIndex = 0; hIndex < matrixWidth; hIndex++)
+ {
+ //@python newH = np.sum(hValues * eigenMatrix[hIndex]) / np.sum(hValues ** 2)
+ double newH = Arrays.stream(MiscMath.elementwiseMultiply(hValues, eigenMatrix.getRow(hIndex))).sum() / Arrays.stream(MiscMath.elementwiseMultiply(hValues, hValues)).sum();
+ hValues[hIndex] = newH;
+
+ }
+ //update values of estimated positions
+ for (int[] pair : estimatedPositions) // pair ~ row, col
+ {
+ double newVal = hValues[pair[0]] * hValues[pair[1]];
+ eigenMatrix.setValue(pair[0], pair[1], newVal);
+ eigenMatrix.setValue(pair[1], pair[0], newVal);
+ }
+
+ iterationCount++;
+
+ //exit loop as soon as new values are similar to the last iteration
+ if (MiscMath.allClose(hValues, hValuesOld, 0d, 1e-05d, false))
+ {
+ break;
+ }
+
+ //save hValues for comparison in the next iteration
+ System.arraycopy(hValues, 0, hValuesOld, 0, hValues.length);
+ }
+
+ //-----------------------------
+ //Use complemented symmetric matrix to calculate final representative
+ //vectors.
+ //&! debug
+ System.out.println("after estimating:");
+ eigenMatrix.print(System.out, "%8.2f");
+
+ //Eigendecomposition.
+ //eigenMatrix.tqli();
+
+ //&! debug
+ //eigenMatrix.print(System.out, "%8.2f");
+
+
+ } catch (Exception q)
+ {
+ Console.error("Error computing cc_analysis: " + q.getMessage());
+ q.printStackTrace();
+ }
+ return eigenMatrix;
+ }
+}