From fd98f37342ae53c649a83ca11fffc9c15b9bd429 Mon Sep 17 00:00:00 2001 From: MorellThomas Date: Wed, 6 Mar 2024 14:55:18 +0100 Subject: [PATCH] Test version to switch branches --- src/jalview/analysis/AlignSeq.java | 4 +- src/jalview/analysis/Connectivity.java | 8 +- src/jalview/analysis/PaSiMap.java | 196 ++++++++++++++++++++++++++++++- src/jalview/gui/PairwiseAlignPanel.java | 2 + 4 files changed, 206 insertions(+), 4 deletions(-) diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index d394024..1b5dab4 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -1456,7 +1456,7 @@ public class AlignSeq score *= coverage; //System.out.println(String.format("prepre-score: %f, pre-score: %f, longlength: %d\nscore: %1.16f, mean: %f, max: %d", preprescore, prescore, _max[1], score, this.meanScore, this.hypotheticMaxScore)); - float minScore = 1f; - this.alignmentScore = (preprescore < minScore) ? Float.NaN : score; + float minScore = 0f; + this.alignmentScore = (score <= minScore) ? Float.NaN : score; } } diff --git a/src/jalview/analysis/Connectivity.java b/src/jalview/analysis/Connectivity.java index 0f849e3..0df1145 100644 --- a/src/jalview/analysis/Connectivity.java +++ b/src/jalview/analysis/Connectivity.java @@ -23,9 +23,14 @@ package jalview.analysis; //import jalview.datamodel.AlignmentView; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; +import jalview.gui.Desktop; +import jalview.gui.JvOptionPane; import jalview.viewmodel.AlignmentViewport; +import java.util.Comparator; import java.util.Hashtable; +import java.util.HashSet; +import java.util.TreeSet; /** * @Author MorellThomas @@ -58,6 +63,7 @@ public class Connectivity int iOld = connectivity.get(sequences[i]); int jOld = connectivity.get(sequences[j]); // count the connection if its score is not NaN +//System.out.println(String.format("%s - %s : %f", sequences[i].getName(), sequences[j].getName(), scores[i][j])); if (!Float.isNaN(scores[i][j])) { connectivity.put(sequences[i], ++iOld); @@ -72,7 +78,7 @@ public class Connectivity System.out.println(String.format("%s: %d", sequence.getName(), connection)); if (connection < dim) { - // a popup saying that it failed would be nice + JvOptionPane.showInternalMessageDialog(Desktop.desktop, String.format("Insufficient number of connections for %s (%d, should be %d or more)", sequence.getName(), connection, dim), "Connectivity Error", JvOptionPane.WARNING_MESSAGE); throw new ConnectivityException(sequence.getName(), connection, dim); } } ); diff --git a/src/jalview/analysis/PaSiMap.java b/src/jalview/analysis/PaSiMap.java index 89bfff0..81d44f5 100755 --- a/src/jalview/analysis/PaSiMap.java +++ b/src/jalview/analysis/PaSiMap.java @@ -26,6 +26,7 @@ import jalview.bin.Console; import jalview.datamodel.AlignmentView; import jalview.datamodel.Point; import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceGroup; import jalview.gui.PairwiseAlignPanel; import jalview.gui.PaSiMapPanel; import jalview.math.Matrix; @@ -35,6 +36,8 @@ import jalview.viewmodel.AlignmentViewport; import java.io.PrintStream; import java.util.Hashtable; +import java.util.HashMap; +import java.util.HashSet; import java.util.Enumeration; /** @@ -204,18 +207,52 @@ public class PaSiMap implements Runnable { try { +for (SequenceI see : seqs.getAlignment().getSequencesArray()) +{ +System.out.println(see.getName()); +} + + int nSeqs = seqs.getAlignment().getHeight(); + float[][] scores = new float[nSeqs][nSeqs]; // rows, cols + + int nSplits = 1; + //while ((nSeqs / nSplits) > 300) // heap full at 341 + while (((float) nSeqs / nSplits) > 5f) // heap full at 341 + nSplits++; + int splitSeqs = (int) Math.ceil((float) nSeqs / nSplits); +System.out.println(String.format("%d -> %d splits into %d seqs", nSeqs, nSplits, splitSeqs)); + + int[] splitIndices = new int[nSplits]; + for (int i = 0; i < nSplits; i++) + { + splitIndices[i] = splitSeqs * (i + 1); //exclusive!! + } + + HashMap valuesForScores = splitCombineAndAlign(seqs.getAlignment().getSequencesArray(), splitIndices, splitSeqs); + + for (int[] coords : valuesForScores.keySet()) + { + scores[coords[0]][coords[1]] = valuesForScores.get(coords); + } +pairwiseScores = new Matrix(scores); +pairwiseScores.print(System.out, "%1.4f "); + +/* alignment = new PairwiseAlignPanel(seqs, true, 100, 5); float[][] scores = alignment.getAlignmentScores(); //bigger index first -- eg scores[14][13] - Hashtable connectivity = seqs.calculateConnectivity(scores, dim); + //Hashtable connectivity = seqs.calculateConnectivity(scores, dim); pairwiseScores = new Matrix(scores); +pairwiseScores.print(System.out, "%1.4f "); +/* pairwiseScores.fillDiagonal(); eigenMatrix = pairwiseScores.copy(); ccAnalysis cc = new ccAnalysis(pairwiseScores, dim); eigenMatrix = cc.run(); +*/ } catch (Exception q) { @@ -225,6 +262,163 @@ public class PaSiMap implements Runnable } /** + * aligns sequences in splits + * Splits each split into halves and aligns them agains halves of other splits + * + * @param seqs + * @param i ~ indices of split + * @param s ~ sequences per split + * + * @return a map of where to put in scores, value ~ scores[n][m] = v + **/ + protected HashMap splitCombineAndAlign(SequenceI[] seqArray, int[] i, int s) + { + HashMap result = new HashMap(); + + int[][] allGroups = new int[i.length][s]; + for (int g = 0; g < i.length; g++) // group g + { + int e = 0; // index going through allGroups[][e] + for (int j = g * s; j < i[g]; j++) // goes through all numbers in one group + { + allGroups[g][e++] = j >= seqArray.length ? -1 : j; + } + } + + int g = 0; // group count + for (int[] group : allGroups) + { + HashSet sg = new HashSet(); + //SequenceGroup sg = new SequenceGroup(); + for (int index : group) + { + if (index == -1) + continue; + //sg.addSequence(seqArray[index], false); + sg.add(seqArray[index]); + } + SequenceI[] sgArray = new SequenceI[sg.size()]; + int k = 0; + for (SequenceI seq : sg) + { + sgArray[k++] = seq; + } + //seqs.setSelectionGroup(sg); + //PairwiseAlignPanel pap = new PairwiseAlignPanel(seqs, true, 100, 5); + //float[][] scores = pap.getAlignmentScores(); //bigger index first -- eg scores[14][13] + float[][] scores = simulateAlignment(sgArray); + for (int s1 = 0; s1 < scores.length; s1++) // row + { + result.put(new int[]{s1 + g * s, s1 + g * s}, Float.NaN); // self score = Float.NaN + for (int s2 = 0; s2 < s1; s2++) // col + { + result.put(new int[]{s1 + g * s, s2 + g * s}, scores[s1][s2]); + } + } + g++; + } + + int smallS = (int) Math.ceil((float) s/2); + int[][] newGroups = new int[i.length * 2][smallS]; + + g = 0; + for (int[] group : allGroups) + { + int[] split1 = new int[smallS]; + int[] split2 = new int[smallS]; + for (int k = 0; k < group.length; k++) + { + if (k < smallS) + split1[k] = group[k]; + else + split2[k - smallS] = group[k]; + } + newGroups[g++] = split1; + newGroups[g++] = split2; + } + + // align each subsplit with subsplits from other split groups + for (int subsplitN = 0; subsplitN < newGroups.length; subsplitN++) + { + int c = 1; // current split block + while (newGroups[subsplitN][0] > smallS * c) + { + c++; + } + for (int nextSplit = subsplitN + 1; nextSplit < newGroups.length; nextSplit++) + { + if (newGroups[nextSplit][0] >= s * c) // if next subsplit of next split group -> align seqs + { + HashSet sg = new HashSet(); + //SequenceGroup sg = new SequenceGroup(); + for (int index : newGroups[subsplitN]) + { + if (index == -1) + continue; + //sg.addSequence(seqArray[index], false); + sg.add(seqArray[index]); + } + for (int index : newGroups[nextSplit]) + { + if (index == -1) + continue; + //sg.addSequence(seqArray[index], false); + sg.add(seqArray[index]); + } + SequenceI[] sgArray = new SequenceI[sg.size()]; + int k = 0; + for (SequenceI seq : sg) + { + sgArray[k++] = seq; + } + //seqs.setSelectionGroup(sg); + //PairwiseAlignPanel pap = new PairwiseAlignPanel(seqs, true, 100, 5); + //float[][] scores = pap.getAlignmentScores(); //bigger index first -- eg scores[14][13] + float[][] scores = simulateAlignment(sgArray); + for (int s1 = 0; s1 < scores.length; s1++) // row + { + for (int s2 = 0; s2 < s1; s2++) // col + { + if (s1 >= smallS && s2 < smallS) + result.put(new int[]{s1 + (nextSplit-1) * smallS, s2 + subsplitN * smallS}, scores[s1][s2]); + } + } + } + } + } + + return result; + } + + /** + * simulate the alignment of a PairwiseAlignPanel + * + * @param seqs + * @return alignment scores + */ + protected float[][] simulateAlignment(SequenceI[] seqs) + { + float[][] result = new float[seqs.length][seqs.length]; + for (int i = 1; i < seqs.length; i++) + { + for (int j = 0; j < i; j++) + { + String[] seqStrings = new String[2]; + seqStrings[0] = seqs[i].getSequenceAsString(); + seqStrings[1] = seqs[j].getSequenceAsString(); + + AlignSeq as = new AlignSeq(seqs[i], seqStrings[0], seqs[j], seqStrings[1], AlignSeq.PEP); + as.calcScoreMatrix(); + as.traceAlignmentWithEndGaps(); + as.scoreAlignment(); + as.printAlignment(System.out); + result[i][j] = as.getAlignmentScore(); + } + } + return result; + } + + /** * Returns a PrintStream that wraps (appends its output to) the given * StringBuilder * diff --git a/src/jalview/gui/PairwiseAlignPanel.java b/src/jalview/gui/PairwiseAlignPanel.java index a84c449..fc862fe 100755 --- a/src/jalview/gui/PairwiseAlignPanel.java +++ b/src/jalview/gui/PairwiseAlignPanel.java @@ -104,6 +104,7 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel double totscore = 0D; int count = seqs.length; boolean first = true; + //AlignSeq as = new AlignSeq(seqs[1], seqStrings[1], seqs[0], seqStrings[0], type, gapOpenCost, gapExtendCost); for (int i = 1; i < count; i++) { @@ -113,6 +114,7 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel { AlignSeq as = new AlignSeq(seqs[i], seqStrings[i], seqs[j], seqStrings[j], type, gapOpenCost, gapExtendCost); +// as.seqInit(seqs[i], seqStrings[i], seqs[j], seqStrings[j], type); if (as.s1str.length() == 0 || as.s2str.length() == 0) { -- 1.7.10.2