JAL-3032 adds Java 8 functionality (2/2)
[jalview.git] / src2 / fr / orsay / lri / varna / models / treealign / ExampleDistance3.java
1 package fr.orsay.lri.varna.models.treealign;
2 import java.util.ArrayList;
3
4
5
6 /**
7  * 
8  * 
9  * @author Raphael Champeimont
10  */
11 public class ExampleDistance3 implements TreeAlignLabelDistanceSymmetric<RNANodeValue2> {
12         public double f(RNANodeValue2 v1, RNANodeValue2 v2) {
13                 if (v1 == null) {
14                         if (v2 == null) {
15                                 return 0;
16                         } else if (!v2.isSingleNode()) { // v2 is a list of bases
17                                 // We insert all bases, with a cost of 1 for each base.
18                                 return v2.getNodes().size();
19                         } else { // v2 is a single node
20                                 return 2;
21                         }
22                 } else if (!v1.isSingleNode()) { // v1 is a list of bases
23                         if (v2 == null) {
24                                 return v1.getNodes().size();
25                         } else if (!v2.isSingleNode()) { // v2 is a list of bases
26                                 // We compute the sequence distance
27                                 return alignSequenceNodes(v1, v2).getDistance();
28                         } else { // v2 is a single node
29                                 return 2 + v1.getNodes().size();
30                         }
31                 } else { // v1 is a single node
32                         // all the same as when v1 == null
33                         if (v2 == null) {
34                                 return 2;
35                         } else if (!v2.isSingleNode()) { // v2 is a list of bases
36                                 return 2 + v2.getNodes().size();
37                         } else { // v2 is a single node
38                                 String l1 = v1.getNode().getLeftNucleotide();
39                                 String r1 = v1.getNode().getRightNucleotide();
40                                 String l2 = v2.getNode().getLeftNucleotide();
41                                 String r2 = v2.getNode().getRightNucleotide();
42                                 // We have cost(subst((x,y) to (x',y'))) = 1
43                                 // when x != x' and y != y'.
44                                 // It means it is less than substituting 2 non-paired bases 
45                                 return (!l1.equals(l2) ? 0.5 : 0)
46                                      + (!r1.equals(r2) ? 0.5 : 0);
47                         }
48                 }
49         }
50         
51
52         public class SequenceAlignResult {
53                 private double distance;
54                 private int[][] alignment;
55                 
56                 public double getDistance() {
57                         return distance;
58                 }
59                 public void setDistance(double distance) {
60                         this.distance = distance;
61                 }
62                 
63                 /** The result array is a matrix of height 2
64                  * and width at most length(sequence A) + length(sequence B).
65                  * with result[0] is the alignment for A
66                  * and result[1] the alignment for B.
67                  * The alignment consists int the indexes of the original
68                  * bases positions, with -1 when there is no match.
69                  */
70                 public int[][] getAlignment() {
71                         return alignment;
72                 }
73                 public void setAlignment(int[][] alignment) {
74                         this.alignment = alignment;
75                 }
76                 
77         }
78         
79         /**
80          * Align two sequences contained in nodes.
81          * Both nodes have to be non-single nodes, otherwise an
82          * RNANodeValue2WrongTypeException exception will be thrown.
83          */
84         public SequenceAlignResult alignSequenceNodes(RNANodeValue2 v1, RNANodeValue2 v2) {
85                 char[] A = v1.computeSequence();
86                 char[] B = v2.computeSequence();
87                 return alignSequences(A, B);
88         }
89         
90         /**
91          * Align sequences using the Needleman-Wunsch algorithm.
92          * Time: O(A.length * B.length)
93          * Space: O(A.length * B.length)
94          * Space used by the returned object: O(A.length + B.length)
95          */
96         public SequenceAlignResult alignSequences(char[] A, char[] B) {
97                 SequenceAlignResult result = new SequenceAlignResult();
98                 
99                 final int la = A.length;
100                 final int lb = B.length;
101                 double[][] F = new double[la+1][lb+1];
102                 int[][] decisions = new int[la+1][lb+1];
103                 final double d = 1; // insertion/deletion cost
104                 final double substCost = 1; // substitution cost
105                 for (int i=0; i<=la; i++)
106                         F[i][0] = d*i;
107                 for (int j=0; j<=lb; j++)
108                         F[0][j] = d*j;
109                 for (int i=1; i<=la; i++)
110                         for (int j=1; j<=lb; j++)
111                         {
112                                 double min;
113                                 int decision;
114                                 double match = F[i-1][j-1] + (A[i-1] == B[j-1] ? 0 : substCost);
115                                 double delete = F[i-1][j] + d;
116                                 if (match < delete) {
117                                         decision = 1;
118                                         min = match;
119                                 } else {
120                                         decision = 2;
121                                         min = delete;
122                                 }
123                                 double insert = F[i][j-1] + d;
124                                 if (insert < min) {
125                                         decision = 3;
126                                         min = insert;
127                                 }
128                                 F[i][j] = min;
129                                 decisions[i][j] = decision;
130                         }
131                 
132                 result.setDistance(F[la][lb]);
133
134                 int[][] alignment = computeAlignment(F, decisions, A, B);
135                 result.setAlignment(alignment);
136                 
137                 return result;
138         }
139         
140         private int[][] computeAlignment(double[][] F, int[][] decisions, char[] A, char[] B) {
141                 // At worst the alignment will be of length (A.length + B.length)
142                 ArrayList<Integer> AlignmentA = new ArrayList<Integer>(A.length + B.length);
143                 ArrayList<Integer> AlignmentB = new ArrayList<Integer>(A.length + B.length);
144                 int i = A.length;
145                 int j = B.length;
146                 while (i > 0 && j > 0)
147                 {
148                         int decision = decisions[i][j];
149                         switch (decision) {
150                         case 1:
151                                 AlignmentA.add(i-1);
152                                 AlignmentB.add(j-1);
153                                 i = i - 1;
154                                 j = j - 1;
155                                 break;
156                         case 2:
157                                 AlignmentA.add(i-1);
158                                 AlignmentB.add(-1);
159                                 i = i - 1;
160                                 break;
161                         case 3:
162                                 AlignmentA.add(-1);
163                                 AlignmentB.add(j-1);
164                                 j = j - 1;
165                                 break;
166                         default:
167                                 throw (new Error("Bug in ExampleDistance3: decision = " + decision));
168                         }
169                 }
170                 while (i > 0)
171                 {
172                         AlignmentA.add(i-1);
173                         AlignmentB.add(-1);
174                         i = i - 1;
175                 }
176                 while (j > 0)
177                 {
178                         AlignmentA.add(-1);
179                         AlignmentB.add(j-1);
180                         j = j - 1;
181                 }
182
183                 // Convert the ArrayLists to the right format:
184                 // We need to reverse the list and change the numbering of
185                 int l = AlignmentA.size();
186                 int[][] result = new int[2][l];
187                 for (i=0; i<l; i++) {
188                         result[0][i] = AlignmentA.get(l-1-i);
189                         result[1][i] = AlignmentB.get(l-1-i);
190                 }
191                 return result;
192                 
193         }
194 }