2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.analysis.ScoreModelI;
24 import jalview.api.analysis.SimilarityParamsI;
25 import jalview.bin.Console;
26 import jalview.datamodel.AlignmentView;
27 import jalview.datamodel.Point;
28 import jalview.datamodel.SequenceI;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.gui.PairwiseAlignPanel;
31 import jalview.gui.PaSiMapPanel;
32 import jalview.math.Matrix;
33 import jalview.math.MatrixI;
34 import jalview.viewmodel.AlignmentViewport;
37 import java.io.PrintStream;
38 import java.util.Hashtable;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.Enumeration;
44 * Performs Principal Component Analysis on given sequences
45 * @AUTHOR MorellThomas
47 public class PaSiMap implements Runnable
52 final private AlignmentViewport seqs;
54 final private ScoreModelI scoreModel;
56 final private SimilarityParamsI similarityParams;
58 final private byte dim = 3;
63 private PairwiseAlignPanel alignment;
65 private MatrixI pairwiseScores;
67 private MatrixI eigenMatrix;
70 * Constructor given the sequences to compute for, the similarity model to
71 * use, and a set of parameters for sequence comparison
77 public PaSiMap(AlignmentViewport sequences, ScoreModelI sm,
78 SimilarityParamsI options)
80 this.seqs = sequences;
82 this.similarityParams = options;
89 * Index of diagonal within matrix
91 * @return Returns value of diagonal from matrix
93 public double getEigenvalue(int i)
95 return eigenMatrix.getD()[i];
99 * Returns coordinates for each datapoint
107 * @param factor ~ is 1
109 * @return DOCUMENT ME!
111 public Point[] getComponents(int l, int n, int mm, float factor)
113 Point[] out = new Point[getHeight()];
115 for (int i = 0; i < out.length; i++)
117 float x = (float) component(i, l) * factor;
118 float y = (float) component(i, n) * factor;
119 float z = (float) component(i, mm) * factor;
120 out[i] = new Point(x, y, z);
132 * @return DOCUMENT ME!
134 public double[] component(int n)
136 // n = index of eigenvector
137 double[] out = new double[getWidth()];
139 for (int i = 0; i < out.length; i++)
141 out[i] = component(i, n);
155 * @return DOCUMENT ME!
157 double component(int row, int n)
159 return eigenMatrix.getValue(row, n);
163 * Answers a formatted text report of the PaSiMap calculation results (matrices
164 * and eigenvalues) suitable for display
168 public String getDetails()
170 StringBuilder sb = new StringBuilder(1024);
171 sb.append("PaSiMap calculation using ").append(scoreModel.getName())
172 .append(" sequence similarity matrix\n========\n\n");
173 PrintStream ps = wrapOutputBuffer(sb);
176 * coordinates matrix, with D vector
178 sb.append(" --- Pairwise correlation coefficients ---\n");
179 pairwiseScores.print(ps, "%8.6f ");
182 sb.append(" --- Eigenvalues ---\n");
183 eigenMatrix.printD(ps, "%15.4e");
186 sb.append(" --- Coordinates ---\n");
187 eigenMatrix.print(ps, "%8.6f ");
190 return sb.toString();
194 * Performs the PaSiMap calculation
196 * creates a new gui/PairwiseAlignPanel with the input sequences (AlignmentViewport)
197 * uses analysis/AlignSeq to creatue the pairwise alignments and calculate the AlignmentScores (float for each pair)
198 * gets all float[][] scores from the gui/PairwiseAlignPanel
199 * checks the connections for each sequence with AlignmentViewport seqs.calculateConnectivity(float[][] scores, int dim) (from analysis/Connectivity) -- throws an Exception if insufficient
200 * creates a math/MatrixI pairwiseScores of the float[][] scores
201 * copys the scores and fills the diagonal to create a symmetric matrix using math/Matrix.fillDiagonal()
202 * performs the analysis/ccAnalysis with the symmetric matrix
203 * gets the eigenmatrix and the eigenvalues using math/Matrix.tqli()
210 for (SequenceI see : seqs.getAlignment().getSequencesArray())
212 System.out.println(see.getName());
215 int nSeqs = seqs.getAlignment().getHeight();
216 float[][] scores = new float[nSeqs][nSeqs]; // rows, cols
219 //while ((nSeqs / nSplits) > 300) // heap full at 341
220 while (((float) nSeqs / nSplits) > 5f) // heap full at 341
222 int splitSeqs = (int) Math.ceil((float) nSeqs / nSplits);
223 System.out.println(String.format("%d -> %d splits into %d seqs", nSeqs, nSplits, splitSeqs));
225 int[] splitIndices = new int[nSplits];
226 for (int i = 0; i < nSplits; i++)
228 splitIndices[i] = splitSeqs * (i + 1); //exclusive!!
231 HashMap<int[], Float> valuesForScores = splitCombineAndAlign(seqs.getAlignment().getSequencesArray(), splitIndices, splitSeqs);
233 for (int[] coords : valuesForScores.keySet())
235 scores[coords[0]][coords[1]] = valuesForScores.get(coords);
237 pairwiseScores = new Matrix(scores);
238 pairwiseScores.print(System.out, "%1.4f ");
241 alignment = new PairwiseAlignPanel(seqs, true, 100, 5);
242 float[][] scores = alignment.getAlignmentScores(); //bigger index first -- eg scores[14][13]
244 //Hashtable<SequenceI, Integer> connectivity = seqs.calculateConnectivity(scores, dim);
246 pairwiseScores = new Matrix(scores);
247 pairwiseScores.print(System.out, "%1.4f ");
249 pairwiseScores.fillDiagonal();
251 eigenMatrix = pairwiseScores.copy();
253 ccAnalysis cc = new ccAnalysis(pairwiseScores, dim);
254 eigenMatrix = cc.run();
257 } catch (Exception q)
259 Console.error("Error computing PaSiMap: " + q.getMessage());
265 * aligns sequences in splits
266 * Splits each split into halves and aligns them agains halves of other splits
269 * @param i ~ indices of split
270 * @param s ~ sequences per split
272 * @return a map of where to put in scores, value ~ scores[n][m] = v
274 protected HashMap<int[], Float> splitCombineAndAlign(SequenceI[] seqArray, int[] i, int s)
276 HashMap<int[], Float> result = new HashMap<int[], Float>();
278 int[][] allGroups = new int[i.length][s];
279 for (int g = 0; g < i.length; g++) // group g
281 int e = 0; // index going through allGroups[][e]
282 for (int j = g * s; j < i[g]; j++) // goes through all numbers in one group
284 allGroups[g][e++] = j >= seqArray.length ? -1 : j;
288 int g = 0; // group count
289 for (int[] group : allGroups)
291 HashSet<SequenceI> sg = new HashSet<SequenceI>();
292 //SequenceGroup sg = new SequenceGroup();
293 for (int index : group)
297 //sg.addSequence(seqArray[index], false);
298 sg.add(seqArray[index]);
300 SequenceI[] sgArray = new SequenceI[sg.size()];
302 for (SequenceI seq : sg)
306 //seqs.setSelectionGroup(sg);
307 //PairwiseAlignPanel pap = new PairwiseAlignPanel(seqs, true, 100, 5);
308 //float[][] scores = pap.getAlignmentScores(); //bigger index first -- eg scores[14][13]
309 float[][] scores = simulateAlignment(sgArray);
310 for (int s1 = 0; s1 < scores.length; s1++) // row
312 result.put(new int[]{s1 + g * s, s1 + g * s}, Float.NaN); // self score = Float.NaN
313 for (int s2 = 0; s2 < s1; s2++) // col
315 result.put(new int[]{s1 + g * s, s2 + g * s}, scores[s1][s2]);
321 int smallS = (int) Math.ceil((float) s/2);
322 int[][] newGroups = new int[i.length * 2][smallS];
325 for (int[] group : allGroups)
327 int[] split1 = new int[smallS];
328 int[] split2 = new int[smallS];
329 for (int k = 0; k < group.length; k++)
332 split1[k] = group[k];
334 split2[k - smallS] = group[k];
336 newGroups[g++] = split1;
337 newGroups[g++] = split2;
340 // align each subsplit with subsplits from other split groups
341 for (int subsplitN = 0; subsplitN < newGroups.length; subsplitN++)
343 int c = 1; // current split block
344 while (newGroups[subsplitN][0] > smallS * c)
348 for (int nextSplit = subsplitN + 1; nextSplit < newGroups.length; nextSplit++)
350 if (newGroups[nextSplit][0] >= s * c) // if next subsplit of next split group -> align seqs
352 HashSet<SequenceI> sg = new HashSet<SequenceI>();
353 //SequenceGroup sg = new SequenceGroup();
354 for (int index : newGroups[subsplitN])
358 //sg.addSequence(seqArray[index], false);
359 sg.add(seqArray[index]);
361 for (int index : newGroups[nextSplit])
365 //sg.addSequence(seqArray[index], false);
366 sg.add(seqArray[index]);
368 SequenceI[] sgArray = new SequenceI[sg.size()];
370 for (SequenceI seq : sg)
374 //seqs.setSelectionGroup(sg);
375 //PairwiseAlignPanel pap = new PairwiseAlignPanel(seqs, true, 100, 5);
376 //float[][] scores = pap.getAlignmentScores(); //bigger index first -- eg scores[14][13]
377 float[][] scores = simulateAlignment(sgArray);
378 for (int s1 = 0; s1 < scores.length; s1++) // row
380 for (int s2 = 0; s2 < s1; s2++) // col
382 if (s1 >= smallS && s2 < smallS)
383 result.put(new int[]{s1 + (nextSplit-1) * smallS, s2 + subsplitN * smallS}, scores[s1][s2]);
394 * simulate the alignment of a PairwiseAlignPanel
397 * @return alignment scores
399 protected float[][] simulateAlignment(SequenceI[] seqs)
401 float[][] result = new float[seqs.length][seqs.length];
402 for (int i = 1; i < seqs.length; i++)
404 for (int j = 0; j < i; j++)
406 String[] seqStrings = new String[2];
407 seqStrings[0] = seqs[i].getSequenceAsString();
408 seqStrings[1] = seqs[j].getSequenceAsString();
410 AlignSeq as = new AlignSeq(seqs[i], seqStrings[0], seqs[j], seqStrings[1], AlignSeq.PEP);
411 as.calcScoreMatrix();
412 as.traceAlignmentWithEndGaps();
414 as.printAlignment(System.out);
415 result[i][j] = as.getAlignmentScore();
422 * Returns a PrintStream that wraps (appends its output to) the given
428 protected PrintStream wrapOutputBuffer(StringBuilder sb)
430 PrintStream ps = new PrintStream(System.out)
433 public void print(String x)
439 public void println()
448 * Answers the N dimensions of the NxM PaSiMap matrix. This is the number of
449 * sequences involved in the pairwise score calculation.
453 public int getHeight()
455 // TODO can any of seqs[] be null?
456 return eigenMatrix.height();// seqs.getSequences().length;
460 * Answers the M dimensions of the NxM PaSiMap matrix. This is the number of
461 * sequences involved in the pairwise score calculation.
465 public int getWidth()
467 // TODO can any of seqs[] be null?
468 return eigenMatrix.width();// seqs.getSequences().length;
472 * Answers the sequence pairwise similarity scores which were the first step
473 * of the PaSiMap calculation
477 public MatrixI getPairwiseScores()
479 return pairwiseScores;
482 public void setPairwiseScores(MatrixI m)
487 public MatrixI getEigenmatrix()
492 public void setEigenmatrix(MatrixI m)
497 public PairwiseAlignPanel getAlignments()
502 public String getAlignmentOutput()
504 return alignment.getAlignmentOutput();