From d5bcf8b86c585efd80a6658bd56b656373c34295 Mon Sep 17 00:00:00 2001 From: MorellThomas Date: Sat, 22 Jul 2023 15:29:51 +0200 Subject: [PATCH] Implement MSA, scoring and counting the connections --- src/jalview/analysis/AlignSeq.java | 1 + src/jalview/analysis/Connectivity.java | 87 +++++++++++++++++++++++ src/jalview/analysis/ConnectivityException.java | 57 +++++++++++++++ src/jalview/analysis/PaSiMap.java | 73 +++++++++++++++++-- src/jalview/datamodel/AlignmentView.java | 1 + src/jalview/gui/CalculationChooser.java | 2 +- src/jalview/gui/PaSiMapPanel.java | 11 ++- src/jalview/gui/PairwiseAlignPanel.java | 9 +++ src/jalview/viewmodel/AlignmentViewport.java | 6 ++ src/jalview/viewmodel/PaSiMapModel.java | 17 +++-- 10 files changed, 249 insertions(+), 15 deletions(-) create mode 100644 src/jalview/analysis/Connectivity.java create mode 100644 src/jalview/analysis/ConnectivityException.java diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 65fd110..10c7e6d 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -854,6 +854,7 @@ public class AlignSeq * @param type * AlignSeq.DNA or AlignSeq.PEP */ + //&! 1 v 1 needleman public static AlignSeq doGlobalNWAlignment(SequenceI s1, SequenceI s2, String type) { diff --git a/src/jalview/analysis/Connectivity.java b/src/jalview/analysis/Connectivity.java new file mode 100644 index 0000000..25f9626 --- /dev/null +++ b/src/jalview/analysis/Connectivity.java @@ -0,0 +1,87 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +//import jalview.datamodel.AlignmentView; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; +import jalview.viewmodel.AlignmentViewport; + +import java.util.Hashtable; + +/** + * @Author MorellThomas + */ + +public class Connectivity +{ + + /** + * Returns the number of unique connections for each sequence + * only connections with a score of above 0 count + * + * @param av sequences + * @param scores alignment scores + * + * @return connectivity + */ + public static Hashtable getConnectivity(AlignmentViewport av, float[][] scores, byte dim) throws RuntimeException + { + SequenceI[] sequences = av.getAlignment().getSequencesArray(); + + Hashtable connectivity = new Hashtable(); + // for each unique connection + for (int i = 0; i < sequences.length; i++) + { + connectivity.putIfAbsent(sequences[i], 0); + for (int j = 0; j < i; j++) + { + connectivity.putIfAbsent(sequences[j], 0); + int iOld = connectivity.get(sequences[i]); + int jOld = connectivity.get(sequences[j]); + // count the connection if its score is bigger than 0.1 + if (scores[i][j] >= 0.1) // what is cutoff instead of just 0.1? + { + connectivity.put(sequences[i], ++iOld); + connectivity.put(sequences[j], ++jOld); + } + } + } + //Debug + for (int i = 0; i < sequences.length; i++) + { + System.out.println(connectivity.get(sequences[i])); + } + + // if a sequence has too few connections, abort + connectivity.forEach((sequence, connection) -> + { + if (connection < dim) + { + // a popup saying that it failed would be nice + throw new ConnectivityException(sequence.getName(), connection, dim); + } + } ); + + return connectivity; + } + +} diff --git a/src/jalview/analysis/ConnectivityException.java b/src/jalview/analysis/ConnectivityException.java new file mode 100644 index 0000000..9915f2a --- /dev/null +++ b/src/jalview/analysis/ConnectivityException.java @@ -0,0 +1,57 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +public class ConnectivityException extends RuntimeException +{ + private String sequence; + private int connection; + private byte dim; + + public ConnectivityException(String sequence, int connection, byte dim) + { + this("Insufficient number of connections", sequence, connection, dim); + } + + public ConnectivityException(String message, String sequence, int connection, byte dim) + { + super(String.format("%s for %s (%d, should be %d or more)", message, sequence, connection, dim)); + this.sequence = sequence; + this.connection = connection; + this.dim = dim; + } + + public String getSequence() + { + return sequence; + } + + public int getConnection() + { + return connection; + } + + public byte getDim() + { + return dim; + } + +} diff --git a/src/jalview/analysis/PaSiMap.java b/src/jalview/analysis/PaSiMap.java index 7714be4..d0362b6 100755 --- a/src/jalview/analysis/PaSiMap.java +++ b/src/jalview/analysis/PaSiMap.java @@ -25,24 +25,34 @@ import jalview.api.analysis.SimilarityParamsI; import jalview.bin.Console; import jalview.datamodel.AlignmentView; import jalview.datamodel.Point; +import jalview.datamodel.SequenceI; +import jalview.gui.PairwiseAlignPanel; +import jalview.gui.PaSiMapPanel; import jalview.math.MatrixI; +import jalview.viewmodel.AlignmentViewport; + import java.io.PrintStream; +import java.util.Hashtable; +import java.util.Enumeration; /** * Performs Principal Component Analysis on given sequences + * @AUTHOR MorellThomas */ public class PaSiMap implements Runnable { /* * inputs */ - final private AlignmentView seqs; + final private AlignmentViewport seqs; final private ScoreModelI scoreModel; final private SimilarityParamsI similarityParams; + final private byte dim = 3; + /* * outputs */ @@ -60,7 +70,9 @@ public class PaSiMap implements Runnable * @param sm * @param options */ - public PaSiMap(AlignmentView sequences, ScoreModelI sm, + //public PaSiMap(AlignmentView sequences, ScoreModelI sm, + //&! viewport or panel? + public PaSiMap(AlignmentViewport sequences, ScoreModelI sm, SimilarityParamsI options) { this.seqs = sequences; @@ -203,22 +215,69 @@ public class PaSiMap implements Runnable { try { + /** here goes pasimap + * FASTA_to_secureFASTA + sed s/ /_/g ~~ aliases = 1st word of secure fasta header + * FASTA_to_stats ~~ unique_headers_count, unique_bodies_count + check: unique_bodies_min, dim_max + * if unaligned : EMBOSS needlall ~~ alignments.fas + * elseif aligned : MSA_to_pairwiseFASTA + * pairwiseFASTA_to_pairwiseCSV + * pairwise_to_connectivity ~~ connectivity.csv + warn if sequence has #connections >= dim + * pairwiseCSV_to_pairwiseQuantifier (use scoreModel) + label results + * cc_analysis quantifier.ssv ~~ vec.ssv + label results + * plot + * + * ###################### + * + * input AlignemtView seqs ~~ check if aligned + * if not ~~ perform needlall (or try whatever is in there) + * calculate connectivity + check if fine + * quantify alignment with ScoreModelI scoreModel + * cc_analysis with quantification + * plot whatever comes out of cc_analysis + */ + + /* + bool aligned = seqs.checkoridk; + if (!aligned) + { + seqs = needlall(seqs); + } + + connectivity + + pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); + + cc_analysis + */ + + // run needleman regardless if aligned or not + // gui.PairwiseAlignPanel <++> + PairwiseAlignPanel alignment = new PairwiseAlignPanel(seqs); + float[][] scores = alignment.getScores(); + System.out.println(scores[8][18]); // do scores have to be divided by totscore?? + + // what is connectivity and why? + Hashtable connectivity = seqs.calculateConnectivity(scores, dim); + + + /** pca code */ /* * sequence pairwise similarity scores */ - pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); + //&! analysis/scoremodels/ScoreMatrix computeSimilarity + //pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); /* * tridiagonal matrix */ - tridiagonal = pairwiseScores.copy(); - tridiagonal.tred(); + //tridiagonal = pairwiseScores.copy(); + //tridiagonal.tred(); /* * the diagonalization matrix */ - eigenMatrix = tridiagonal.copy(); - eigenMatrix.tqli(); + //eigenMatrix = tridiagonal.copy(); + //eigenMatrix.tqli(); } catch (Exception q) { Console.error("Error computing PaSiMap: " + q.getMessage()); diff --git a/src/jalview/datamodel/AlignmentView.java b/src/jalview/datamodel/AlignmentView.java index e6604d1..8b2a5c3 100644 --- a/src/jalview/datamodel/AlignmentView.java +++ b/src/jalview/datamodel/AlignmentView.java @@ -1106,6 +1106,7 @@ public class AlignmentView } } } + //&! AlignmentI visal = view.getVisibleAlignment('-'); if (visal != null) { diff --git a/src/jalview/gui/CalculationChooser.java b/src/jalview/gui/CalculationChooser.java index 9e596e2..270383b 100644 --- a/src/jalview/gui/CalculationChooser.java +++ b/src/jalview/gui/CalculationChooser.java @@ -28,6 +28,7 @@ import jalview.api.analysis.SimilarityParamsI; import jalview.bin.Cache; import jalview.datamodel.SequenceGroup; import jalview.util.MessageManager; +import jalview.viewmodel.AlignmentViewport; import java.awt.BorderLayout; import java.awt.Color; @@ -656,7 +657,6 @@ public class CalculationChooser extends JPanel /* * construct the panel and kick off its calculation thread */ - //&! change to PaSiMapPanel pasimapPanel = new PaSiMapPanel(af.alignPanel, modelName, params); new Thread(pasimapPanel).start(); diff --git a/src/jalview/gui/PaSiMapPanel.java b/src/jalview/gui/PaSiMapPanel.java index e394781..62a0c80 100644 --- a/src/jalview/gui/PaSiMapPanel.java +++ b/src/jalview/gui/PaSiMapPanel.java @@ -109,6 +109,7 @@ public class PaSiMapPanel extends GPCAPanel boolean selected = av.getSelectionGroup() != null && av.getSelectionGroup().getSize() > 0; + //&! do i need seqstrings? AlignmentView seqstrings = av.getAlignmentView(selected); SequenceI[] seqs; if (!selected) @@ -123,7 +124,9 @@ public class PaSiMapPanel extends GPCAPanel ScoreModelI scoreModel = ScoreModels.getInstance() .getScoreModel(modelName, ap); setPasimapModel( - new PaSiMapModel(seqstrings, seqs, nucleotide, scoreModel, params)); + //&! + //new PaSiMapModel(seqstrings, seqs, nucleotide, scoreModel, params)); + new PaSiMapModel(av, seqs, nucleotide, scoreModel, params)); PaintRefresher.Register(this, av.getSequenceSetId()); setRotatableCanvas(new RotatableCanvas(alignPanel)); @@ -318,7 +321,7 @@ public class PaSiMapPanel extends GPCAPanel } Object[] alAndColsel = getPasimapModel().getInputData() - .getAlignmentAndHiddenColumns(gc); + .getAlignmentView(false).getAlignmentAndHiddenColumns(gc); if (alAndColsel != null && alAndColsel[0] != null) { @@ -712,7 +715,9 @@ public class PaSiMapPanel extends GPCAPanel * * @param data */ - public void setInputData(AlignmentView data) + //public void setInputData(AlignmentView data) + //&! viewport or panel? + public void setInputData(AlignmentViewport data) { getPasimapModel().setInputData(data); originalSeqData.setVisible(data != null); diff --git a/src/jalview/gui/PairwiseAlignPanel.java b/src/jalview/gui/PairwiseAlignPanel.java index c4b5367..c981b4c 100755 --- a/src/jalview/gui/PairwiseAlignPanel.java +++ b/src/jalview/gui/PairwiseAlignPanel.java @@ -43,6 +43,8 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel private static final String DASHES = "---------------------\n"; + private float[][] scores; + AlignmentViewport av; Vector sequences; @@ -118,12 +120,19 @@ public class PairwiseAlignPanel extends GPairwiseAlignPanel } } + this.scores = scores; + if (count > 2) { printScoreMatrix(seqs, scores, totscore); } } + public float[][] getScores() + { + return this.scores; + } + /** * Prints a matrix of seqi-seqj pairwise alignment scores to sysout * diff --git a/src/jalview/viewmodel/AlignmentViewport.java b/src/jalview/viewmodel/AlignmentViewport.java index 08af2ec..8402b32 100644 --- a/src/jalview/viewmodel/AlignmentViewport.java +++ b/src/jalview/viewmodel/AlignmentViewport.java @@ -21,6 +21,7 @@ package jalview.viewmodel; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; +import jalview.analysis.Connectivity; import jalview.analysis.Conservation; import jalview.analysis.TreeModel; import jalview.api.AlignCalcManagerI; @@ -3096,4 +3097,9 @@ public abstract class AlignmentViewport return (alignment.getHiddenColumns().getVisContigsIterator(start, end, false)); } + + public Hashtable calculateConnectivity(float[][] scores, byte dim) + { + return Connectivity.getConnectivity(this, scores, dim); + } } diff --git a/src/jalview/viewmodel/PaSiMapModel.java b/src/jalview/viewmodel/PaSiMapModel.java index 94485a3..c9545a8 100644 --- a/src/jalview/viewmodel/PaSiMapModel.java +++ b/src/jalview/viewmodel/PaSiMapModel.java @@ -28,6 +28,7 @@ import jalview.datamodel.AlignmentView; import jalview.datamodel.Point; import jalview.datamodel.SequenceI; import jalview.datamodel.SequencePoint; +import jalview.viewmodel.AlignmentViewport; import java.util.List; import java.util.Vector; @@ -37,7 +38,9 @@ public class PaSiMapModel /* * inputs */ - private AlignmentView inputData; + //private AlignmentView inputData; + //&! + private AlignmentViewport inputData; private final SequenceI[] seqs; @@ -69,7 +72,9 @@ public class PaSiMapModel * @param modelName * @param params */ - public PaSiMapModel(AlignmentView seqData, SequenceI[] sqs, boolean nuc, + //public PaSiMapModel(AlignmentView seqData, SequenceI[] sqs, boolean nuc, + //&! viewport or panel? + public PaSiMapModel(AlignmentViewport seqData, SequenceI[] sqs, boolean nuc, ScoreModelI modelName, SimilarityParamsI params) { inputData = seqData; @@ -166,12 +171,16 @@ public class PaSiMapModel return pasimap.getDetails(); } - public AlignmentView getInputData() + //&! + //public AlignmentView getInputData() + public AlignmentViewport getInputData() { return inputData; } - public void setInputData(AlignmentView data) + //&! + //public void setInputData(AlignmentView data) + public void setInputData(AlignmentViewport data) { inputData = data; } -- 1.7.10.2