--- /dev/null
+package fr.orsay.lri.varna.models.treealign;
+import java.util.ArrayList;
+
+
+
+/**
+ *
+ *
+ * @author Raphael Champeimont
+ */
+public class ExampleDistance3 implements TreeAlignLabelDistanceSymmetric<RNANodeValue2> {
+ public double f(RNANodeValue2 v1, RNANodeValue2 v2) {
+ if (v1 == null) {
+ if (v2 == null) {
+ return 0;
+ } else if (!v2.isSingleNode()) { // v2 is a list of bases
+ // We insert all bases, with a cost of 1 for each base.
+ return v2.getNodes().size();
+ } else { // v2 is a single node
+ return 2;
+ }
+ } else if (!v1.isSingleNode()) { // v1 is a list of bases
+ if (v2 == null) {
+ return v1.getNodes().size();
+ } else if (!v2.isSingleNode()) { // v2 is a list of bases
+ // We compute the sequence distance
+ return alignSequenceNodes(v1, v2).getDistance();
+ } else { // v2 is a single node
+ return 2 + v1.getNodes().size();
+ }
+ } else { // v1 is a single node
+ // all the same as when v1 == null
+ if (v2 == null) {
+ return 2;
+ } else if (!v2.isSingleNode()) { // v2 is a list of bases
+ return 2 + v2.getNodes().size();
+ } else { // v2 is a single node
+ String l1 = v1.getNode().getLeftNucleotide();
+ String r1 = v1.getNode().getRightNucleotide();
+ String l2 = v2.getNode().getLeftNucleotide();
+ String r2 = v2.getNode().getRightNucleotide();
+ // We have cost(subst((x,y) to (x',y'))) = 1
+ // when x != x' and y != y'.
+ // It means it is less than substituting 2 non-paired bases
+ return (!l1.equals(l2) ? 0.5 : 0)
+ + (!r1.equals(r2) ? 0.5 : 0);
+ }
+ }
+ }
+
+
+ public class SequenceAlignResult {
+ private double distance;
+ private int[][] alignment;
+
+ public double getDistance() {
+ return distance;
+ }
+ public void setDistance(double distance) {
+ this.distance = distance;
+ }
+
+ /** The result array is a matrix of height 2
+ * and width at most length(sequence A) + length(sequence B).
+ * with result[0] is the alignment for A
+ * and result[1] the alignment for B.
+ * The alignment consists int the indexes of the original
+ * bases positions, with -1 when there is no match.
+ */
+ public int[][] getAlignment() {
+ return alignment;
+ }
+ public void setAlignment(int[][] alignment) {
+ this.alignment = alignment;
+ }
+
+ }
+
+ /**
+ * Align two sequences contained in nodes.
+ * Both nodes have to be non-single nodes, otherwise an
+ * RNANodeValue2WrongTypeException exception will be thrown.
+ */
+ public SequenceAlignResult alignSequenceNodes(RNANodeValue2 v1, RNANodeValue2 v2) {
+ char[] A = v1.computeSequence();
+ char[] B = v2.computeSequence();
+ return alignSequences(A, B);
+ }
+
+ /**
+ * Align sequences using the Needleman-Wunsch algorithm.
+ * Time: O(A.length * B.length)
+ * Space: O(A.length * B.length)
+ * Space used by the returned object: O(A.length + B.length)
+ */
+ public SequenceAlignResult alignSequences(char[] A, char[] B) {
+ SequenceAlignResult result = new SequenceAlignResult();
+
+ final int la = A.length;
+ final int lb = B.length;
+ double[][] F = new double[la+1][lb+1];
+ int[][] decisions = new int[la+1][lb+1];
+ final double d = 1; // insertion/deletion cost
+ final double substCost = 1; // substitution cost
+ for (int i=0; i<=la; i++)
+ F[i][0] = d*i;
+ for (int j=0; j<=lb; j++)
+ F[0][j] = d*j;
+ for (int i=1; i<=la; i++)
+ for (int j=1; j<=lb; j++)
+ {
+ double min;
+ int decision;
+ double match = F[i-1][j-1] + (A[i-1] == B[j-1] ? 0 : substCost);
+ double delete = F[i-1][j] + d;
+ if (match < delete) {
+ decision = 1;
+ min = match;
+ } else {
+ decision = 2;
+ min = delete;
+ }
+ double insert = F[i][j-1] + d;
+ if (insert < min) {
+ decision = 3;
+ min = insert;
+ }
+ F[i][j] = min;
+ decisions[i][j] = decision;
+ }
+
+ result.setDistance(F[la][lb]);
+
+ int[][] alignment = computeAlignment(F, decisions, A, B);
+ result.setAlignment(alignment);
+
+ return result;
+ }
+
+ private int[][] computeAlignment(double[][] F, int[][] decisions, char[] A, char[] B) {
+ // At worst the alignment will be of length (A.length + B.length)
+ ArrayList<Integer> AlignmentA = new ArrayList<Integer>(A.length + B.length);
+ ArrayList<Integer> AlignmentB = new ArrayList<Integer>(A.length + B.length);
+ int i = A.length;
+ int j = B.length;
+ while (i > 0 && j > 0)
+ {
+ int decision = decisions[i][j];
+ switch (decision) {
+ case 1:
+ AlignmentA.add(i-1);
+ AlignmentB.add(j-1);
+ i = i - 1;
+ j = j - 1;
+ break;
+ case 2:
+ AlignmentA.add(i-1);
+ AlignmentB.add(-1);
+ i = i - 1;
+ break;
+ case 3:
+ AlignmentA.add(-1);
+ AlignmentB.add(j-1);
+ j = j - 1;
+ break;
+ default:
+ throw (new Error("Bug in ExampleDistance3: decision = " + decision));
+ }
+ }
+ while (i > 0)
+ {
+ AlignmentA.add(i-1);
+ AlignmentB.add(-1);
+ i = i - 1;
+ }
+ while (j > 0)
+ {
+ AlignmentA.add(-1);
+ AlignmentB.add(j-1);
+ j = j - 1;
+ }
+
+ // Convert the ArrayLists to the right format:
+ // We need to reverse the list and change the numbering of
+ int l = AlignmentA.size();
+ int[][] result = new int[2][l];
+ for (i=0; i<l; i++) {
+ result[0][i] = AlignmentA.get(l-1-i);
+ result[1][i] = AlignmentB.get(l-1-i);
+ }
+ return result;
+
+ }
+}