X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=srcjar%2Ffr%2Forsay%2Flri%2Fvarna%2Fmodels%2Ftreealign%2FExampleDistance3.java;fp=srcjar%2Ffr%2Forsay%2Flri%2Fvarna%2Fmodels%2Ftreealign%2FExampleDistance3.java;h=cba2a8a5ff303ee425f348ebb4377831868103ef;hb=ec8f3cedf60fb1feed6d34de6b49f6bfa78b9dd8;hp=0000000000000000000000000000000000000000;hpb=056dad85a910551cc95e44d451a61f6b8c4dd35d;p=jalview.git diff --git a/srcjar/fr/orsay/lri/varna/models/treealign/ExampleDistance3.java b/srcjar/fr/orsay/lri/varna/models/treealign/ExampleDistance3.java new file mode 100644 index 0000000..cba2a8a --- /dev/null +++ b/srcjar/fr/orsay/lri/varna/models/treealign/ExampleDistance3.java @@ -0,0 +1,194 @@ +package fr.orsay.lri.varna.models.treealign; +import java.util.ArrayList; + + + +/** + * + * + * @author Raphael Champeimont + */ +public class ExampleDistance3 implements TreeAlignLabelDistanceSymmetric { + public double f(RNANodeValue2 v1, RNANodeValue2 v2) { + if (v1 == null) { + if (v2 == null) { + return 0; + } else if (!v2.isSingleNode()) { // v2 is a list of bases + // We insert all bases, with a cost of 1 for each base. + return v2.getNodes().size(); + } else { // v2 is a single node + return 2; + } + } else if (!v1.isSingleNode()) { // v1 is a list of bases + if (v2 == null) { + return v1.getNodes().size(); + } else if (!v2.isSingleNode()) { // v2 is a list of bases + // We compute the sequence distance + return alignSequenceNodes(v1, v2).getDistance(); + } else { // v2 is a single node + return 2 + v1.getNodes().size(); + } + } else { // v1 is a single node + // all the same as when v1 == null + if (v2 == null) { + return 2; + } else if (!v2.isSingleNode()) { // v2 is a list of bases + return 2 + v2.getNodes().size(); + } else { // v2 is a single node + String l1 = v1.getNode().getLeftNucleotide(); + String r1 = v1.getNode().getRightNucleotide(); + String l2 = v2.getNode().getLeftNucleotide(); + String r2 = v2.getNode().getRightNucleotide(); + // We have cost(subst((x,y) to (x',y'))) = 1 + // when x != x' and y != y'. + // It means it is less than substituting 2 non-paired bases + return (!l1.equals(l2) ? 0.5 : 0) + + (!r1.equals(r2) ? 0.5 : 0); + } + } + } + + + public class SequenceAlignResult { + private double distance; + private int[][] alignment; + + public double getDistance() { + return distance; + } + public void setDistance(double distance) { + this.distance = distance; + } + + /** The result array is a matrix of height 2 + * and width at most length(sequence A) + length(sequence B). + * with result[0] is the alignment for A + * and result[1] the alignment for B. + * The alignment consists int the indexes of the original + * bases positions, with -1 when there is no match. + */ + public int[][] getAlignment() { + return alignment; + } + public void setAlignment(int[][] alignment) { + this.alignment = alignment; + } + + } + + /** + * Align two sequences contained in nodes. + * Both nodes have to be non-single nodes, otherwise an + * RNANodeValue2WrongTypeException exception will be thrown. + */ + public SequenceAlignResult alignSequenceNodes(RNANodeValue2 v1, RNANodeValue2 v2) { + char[] A = v1.computeSequence(); + char[] B = v2.computeSequence(); + return alignSequences(A, B); + } + + /** + * Align sequences using the Needleman-Wunsch algorithm. + * Time: O(A.length * B.length) + * Space: O(A.length * B.length) + * Space used by the returned object: O(A.length + B.length) + */ + public SequenceAlignResult alignSequences(char[] A, char[] B) { + SequenceAlignResult result = new SequenceAlignResult(); + + final int la = A.length; + final int lb = B.length; + double[][] F = new double[la+1][lb+1]; + int[][] decisions = new int[la+1][lb+1]; + final double d = 1; // insertion/deletion cost + final double substCost = 1; // substitution cost + for (int i=0; i<=la; i++) + F[i][0] = d*i; + for (int j=0; j<=lb; j++) + F[0][j] = d*j; + for (int i=1; i<=la; i++) + for (int j=1; j<=lb; j++) + { + double min; + int decision; + double match = F[i-1][j-1] + (A[i-1] == B[j-1] ? 0 : substCost); + double delete = F[i-1][j] + d; + if (match < delete) { + decision = 1; + min = match; + } else { + decision = 2; + min = delete; + } + double insert = F[i][j-1] + d; + if (insert < min) { + decision = 3; + min = insert; + } + F[i][j] = min; + decisions[i][j] = decision; + } + + result.setDistance(F[la][lb]); + + int[][] alignment = computeAlignment(F, decisions, A, B); + result.setAlignment(alignment); + + return result; + } + + private int[][] computeAlignment(double[][] F, int[][] decisions, char[] A, char[] B) { + // At worst the alignment will be of length (A.length + B.length) + ArrayList AlignmentA = new ArrayList(A.length + B.length); + ArrayList AlignmentB = new ArrayList(A.length + B.length); + int i = A.length; + int j = B.length; + while (i > 0 && j > 0) + { + int decision = decisions[i][j]; + switch (decision) { + case 1: + AlignmentA.add(i-1); + AlignmentB.add(j-1); + i = i - 1; + j = j - 1; + break; + case 2: + AlignmentA.add(i-1); + AlignmentB.add(-1); + i = i - 1; + break; + case 3: + AlignmentA.add(-1); + AlignmentB.add(j-1); + j = j - 1; + break; + default: + throw (new Error("Bug in ExampleDistance3: decision = " + decision)); + } + } + while (i > 0) + { + AlignmentA.add(i-1); + AlignmentB.add(-1); + i = i - 1; + } + while (j > 0) + { + AlignmentA.add(-1); + AlignmentB.add(j-1); + j = j - 1; + } + + // Convert the ArrayLists to the right format: + // We need to reverse the list and change the numbering of + int l = AlignmentA.size(); + int[][] result = new int[2][l]; + for (i=0; i