5fce12bf4d5eb275dcde03730325b65b2a07b028
[jalview.git] / srcjar / fr / orsay / lri / varna / applications / SecStrConsensus.java
1 package fr.orsay.lri.varna.applications;
2
3 import java.util.ArrayList;
4 import java.util.Hashtable;
5
6 import fr.orsay.lri.varna.models.rna.ModeleBP;
7
8 public class SecStrConsensus {
9         
10         
11         /**
12          * Internal class to represent a simple base-pair.
13          * @author Yawn
14          *
15          */
16         static class SimpleBP{
17                 int bp5;
18                 int bp3;
19                 
20                 public SimpleBP(int i5, int i3)
21                 {
22                         bp5=i5;
23                         bp3=i3;
24                 }
25         }
26         
27         public static int[] extractConsensus(ArrayList<ArrayList<SimpleBP>> bps)
28         {
29                 // We do not currently know the length of the alignment
30                 // => Estimate it as the biggest index of a base-pair plus one. 
31                 int maxlength = 0;
32                 for (ArrayList<SimpleBP> strs : bps)
33                 {
34                         for (SimpleBP bp : strs)
35                         {
36                                 maxlength = Math.max(1+Math.max(bp.bp5, bp.bp3), maxlength);
37                         }
38                 }
39                 // Now we have a good estimate for length, allocate and initialize data
40                 // to be fed to the dynamic programming procedure.
41                 ArrayList<Hashtable<Integer,Double>> seq = new ArrayList<Hashtable<Integer,Double>>();
42                 for (int i=0;i<seq.size();i++)
43                 { seq.add(new Hashtable<Integer,Double>()); }
44                 for (ArrayList<SimpleBP> strs : bps)
45                 {
46                         for (SimpleBP bp : strs)
47                         {
48                                 int i = bp.bp5;
49                                 int j = bp.bp3;
50                                 Hashtable<Integer,Double> h = seq.get(i);
51                                 if (!h.containsKey(j))
52                                 {
53                                         h.put(j, 0.0);
54                                 }
55                                 h.put(j, h.get(i)+1.);
56                         }
57                 }
58                 // At this point, seq contains, at each position i, a hashtable which associates,
59                 // to each possible end j, the number of time a base-pair (i,j) occurs in the alignment
60                 
61                 // We can now run the dynamic programming procedure on this data
62                 double[][] mat = fillMatrix(seq); 
63                 ArrayList<SimpleBP> res = backtrack(mat,seq);
64                 
65                 // Convert it to an array, ie finalres[i] = j >= 0 iff a base-pair (i,j) is present 
66                 // in the consensus, or -1 otherwise
67                 int[] finalres = new int[seq.size()];
68                 for (int i=0;i<seq.size();i++)
69                 { finalres[i] = -1; }
70                 for (SimpleBP bp : res)
71                 {
72                         finalres[bp.bp5] = bp.bp3;
73                         finalres[bp.bp3] = bp.bp5;
74                 }
75                 
76                 return finalres;
77         }
78         
79         private static boolean canBasePair(ArrayList<Hashtable<Integer,Double>> seq, int i, int k)
80         {
81                 return seq.get(i).containsKey(k);
82         }
83         
84         // Returns the score of a potential base-pair, ie the number of structures in which it is found.
85         private static double basePairScore(ArrayList<Hashtable<Integer,Double>> seq, int i, int k)
86         {
87                 return seq.get(i).get(k);       
88         }
89         
90         
91         private static double[][] fillMatrix(ArrayList<Hashtable<Integer,Double>> seq)
92         {
93                 int n = seq.size();
94                 double[][] tab = new double[n][n];
95                 for(int m=1;m<=n;m++)
96                 {
97                         for(int i=0;i<n-m+1;i++)
98                         {
99                                 int j = i+m-1;
100                                 tab[i][j] = 0;
101                                 if (i<j)
102                                 { 
103                                         tab[i][j] = Math.max(tab[i][j], tab[i+1][j]); 
104                                         for (int k=i+1;k<=j;k++)
105                                         {
106                                                 if (canBasePair(seq,i,k))
107                                                 {
108                                                         double fact1 = 0;
109                                                         if (k>i+1)
110                                                         {
111                                                                 fact1 = tab[i+1][k-1];
112                                                         }
113                                                         double fact2 = 0;
114                                                         if (k<j)
115                                                         {
116                                                                 fact2 = tab[k+1][j];
117                                                         }
118                                                         tab[i][j] = Math.max(tab[i][j],basePairScore(seq,i,k)+fact1+fact2);
119                                                 } 
120                                         }
121                                 }
122                         }                       
123                 }
124                 return tab;
125         }       
126         
127         private static  ArrayList<SimpleBP> backtrack(double[][] tab,ArrayList<Hashtable<Integer,Double>> seq)
128         {
129                 return backtrack(tab,seq,0,seq.size()-1);
130         }
131         
132         private static ArrayList<SimpleBP> backtrack(double[][] tab,ArrayList<Hashtable<Integer,Double>> seq, int i, int j)
133         {
134                 ArrayList<SimpleBP> result = new ArrayList<SimpleBP>();
135                 if (i<j)
136                 { 
137                         ArrayList<Integer> indices = new ArrayList<Integer>();
138                         indices.add(-1);
139                         for (int k=i+1;k<=j;k++)
140                         {
141                                 indices.add(k);
142                         }
143                         for (int k : indices)
144                         {
145                                 if (k==-1)
146                                 {
147                                         if (tab[i][j] == tab[i+1][j])
148                                         {
149                                                 result = backtrack(tab, seq, i+1,j);
150                                         }
151                                 }
152                                 else
153                                 {
154                                         if (canBasePair(seq,i,k))
155                                         {
156                                                 double fact1 = 0;
157                                                 if (k>i+1)
158                                                 {
159                                                         fact1 = tab[i+1][k-1];
160                                                 }
161                                                 double fact2 = 0;
162                                                 if (k<j)
163                                                 {
164                                                         fact2 = tab[k+1][j];
165                                                 }
166                                                 if (tab[i][j]==basePairScore(seq,i,k)+fact1+fact2)
167                                                 { 
168                                                         result = backtrack(tab, seq, i+1,k-1);
169                                                         result.addAll(backtrack(tab, seq, k+1,j));
170                                                         result.add(new SimpleBP(i,k));
171                                                 }
172                                         }                                       
173                                 }                               
174                         }
175                 }
176                 else if  (i==j)
177                 {
178                         
179                 }
180                 else 
181                 {
182                         
183                 }
184                 return result;
185         }
186         
187
188
189 }