JAL-3026 srcjar files for VARNA and log4j
[jalview.git] / srcjar / fr / orsay / lri / varna / models / templates / RNATemplateAlign.java
1 package fr.orsay.lri.varna.models.templates;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.HashSet;
6 import java.util.Iterator;
7 import java.util.LinkedList;
8 import java.util.List;
9 import java.util.Map;
10
11 import fr.orsay.lri.varna.exceptions.ExceptionInvalidRNATemplate;
12 import fr.orsay.lri.varna.models.rna.ModeleBaseNucleotide;
13 import fr.orsay.lri.varna.models.rna.ModeleBP;
14 import fr.orsay.lri.varna.models.rna.RNA;
15 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateElement;
16 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateHelix;
17 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateUnpairedSequence;
18 import fr.orsay.lri.varna.models.treealign.AlignedNode;
19 import fr.orsay.lri.varna.models.treealign.RNANodeValue;
20 import fr.orsay.lri.varna.models.treealign.RNANodeValue2;
21 import fr.orsay.lri.varna.models.treealign.RNATree2;
22 import fr.orsay.lri.varna.models.treealign.RNATree2Exception;
23 import fr.orsay.lri.varna.models.treealign.Tree;
24 import fr.orsay.lri.varna.models.treealign.TreeAlign;
25 import fr.orsay.lri.varna.models.treealign.TreeAlignException;
26 import fr.orsay.lri.varna.models.treealign.TreeAlignResult;
27
28 /**
29  * This class is about the alignment between a tree of RNANodeValue2
30  * and a tree of RNANodeValueTemplate.
31  * 
32  * @author Raphael Champeimont
33  */
34 public class RNATemplateAlign {
35         
36         // We check this node can be part of a non-broken helix.
37         private static boolean canBePartOfAnHelix(RNANodeValue2 leftNodeValue) {
38                 return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() > 0;
39         }
40         
41         private static boolean canBePartOfASequence(RNANodeValue2 leftNodeValue) {
42                 return (leftNodeValue != null) && !leftNodeValue.isSingleNode();
43         }
44         
45         private static boolean canBePartOfABrokenHelix(RNANodeValue2 leftNodeValue) {
46                 return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() < 0;
47         }
48         
49         
50         /**
51          * This method takes an alignment between a tree of RNANodeValue2
52          * of RNANodeValue and a tree of RNANodeValue2 of RNANodeValueTemplate,
53          * and the original RNA object that was used to create the first tree
54          * in the alignment.
55          * It returns the corresponding RNATemplateMapping.
56          */
57         public static RNATemplateMapping makeTemplateMapping(TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult, RNA rna) throws RNATemplateMappingException {
58                 RNATemplateMapping mapping = new RNATemplateMapping();
59                 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> alignment = alignResult.getAlignment();
60                 mapping.setDistance(alignResult.getDistance());
61                 
62                 // Map sequences and helices together, without managing pseudoknots
63                 {
64                         // We will go through the tree using a DFS
65                         // The reason why this algorithm is not trivial is that we may have
66                         // a longer helix on the RNA side than on the template side, in which
67                         // case some nodes on the RNA side are going to be alone while we
68                         // would want them to be part of the helix.
69                         RNATemplateHelix currentHelix = null;
70                         LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
71                         List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
72                         remainingNodes.add(alignment);
73                         while (!remainingNodes.isEmpty()) {
74                                 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
75                                 remainingNodes.removeLast();
76                                 
77                                 Tree<RNANodeValue2> leftNode = node.getValue().getLeftNode();
78                                 Tree<RNANodeValueTemplate> rightNode = node.getValue().getRightNode();
79                                 
80                                 // Do we have something on RNA side?
81                                 if (leftNode != null && leftNode.getValue() != null) {
82                                         RNANodeValue2 leftNodeValue = leftNode.getValue();
83                                         
84                                         // Do we have something on template side?
85                                         if (rightNode != null && rightNode.getValue() != null) {
86                                                 RNANodeValueTemplate rightNodeValue = rightNode.getValue();
87                                                 
88                                                 if (rightNodeValue instanceof RNANodeValueTemplateBasePair
89                                                                 && canBePartOfAnHelix(leftNodeValue)) {
90                                                         RNATemplateHelix helix = ((RNANodeValueTemplateBasePair) rightNodeValue).getHelix();
91                                                         currentHelix = helix;
92                                                         int i = leftNodeValue.getNode().getLeftBasePosition();
93                                                         int j = leftNodeValue.getNode().getRightBasePosition();
94                                                         mapping.addCouple(i, helix);
95                                                         mapping.addCouple(j, helix);
96                                                         
97                                                         // Maybe we have marked nodes as part of the same helix
98                                                         // when we didn't know yet which helix it was.
99                                                         if (nodesInSameHelix.size() > 0) {
100                                                                 for (RNANodeValue2 v: nodesInSameHelix) {
101                                                                         int k = v.getNode().getLeftBasePosition();
102                                                                         int l = v.getNode().getRightBasePosition();
103                                                                         // We want to check nodesInSameHelix is a parent helix and not a sibling.
104                                                                         boolean validExtension = (k < i) && (j < l);
105                                                                         if (validExtension) {
106                                                                                 mapping.addCouple(v.getNode().getLeftBasePosition(), helix);
107                                                                                 mapping.addCouple(v.getNode().getRightBasePosition(), helix);
108                                                                         }       
109                                                                 }
110                                                         }
111                                                         nodesInSameHelix.clear();
112                                                         
113                                                 } else if (rightNodeValue instanceof RNANodeValueTemplateSequence
114                                                                 && canBePartOfASequence(leftNodeValue)) {
115                                                         currentHelix = null;
116                                                         nodesInSameHelix.clear();
117                                                         RNATemplateUnpairedSequence sequence = ((RNANodeValueTemplateSequence) rightNodeValue).getSequence();
118                                                         for (RNANodeValue nodeValue: leftNode.getValue().getNodes()) {
119                                                                 mapping.addCouple(nodeValue.getLeftBasePosition(), sequence);
120                                                         }
121                                                 } else {
122                                                         // Pseudoknot in template
123                                                         currentHelix = null;
124                                                         nodesInSameHelix.clear();
125                                                 }
126                                                 
127                                         } else {
128                                                 // We have nothing on template side
129                                                 
130                                                 if (canBePartOfAnHelix(leftNodeValue)) {
131                                                         if (currentHelix != null) {
132                                                                 // We may be in this case when the RNA
133                                                                 // contains a longer helix than in the template 
134                                                                 int i = leftNodeValue.getNode().getLeftBasePosition();
135                                                                 int j = leftNodeValue.getNode().getRightBasePosition();
136                                                                 int k = Integer.MAX_VALUE;
137                                                                 int l = Integer.MIN_VALUE;
138                                                                 for (int b: mapping.getAncestor(currentHelix)) {
139                                                                         k = Math.min(k, b);
140                                                                         l = Math.max(l, b);
141                                                                 }
142                                                                 // We want to check currentHelix is a parent helix and not a sibling.
143                                                                 boolean validExtension = (k < i) && (j < l);
144                                                                 if (validExtension) {
145                                                                         mapping.addCouple(i, currentHelix);
146                                                                         mapping.addCouple(j, currentHelix);
147                                                                 }       
148                                                         } else {
149                                                                 // Maybe this left node is part of an helix
150                                                                 // which is smaller in the template
151                                                                 nodesInSameHelix.add(leftNodeValue);
152                                                         }
153                                                 }
154                                         }
155                                 }
156                                 
157                                 
158                                 // If this node has children, add them in the stack
159                                 List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
160                                 int n = children.size();
161                                 if (n > 0) {
162                                         int helixChildren = 0;
163                                         // For each subtree, we want the sequences (in RNA side) to be treated first
164                                         // and then the helix nodes, because finding an aligned sequence tells
165                                         // us we cannot grow a current helix
166                                         ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack1 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
167                                         ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack2 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
168                                         for (int i=0; i<n; i++) {
169                                                 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child = children.get(i);
170                                                 Tree<RNANodeValue2> RNAchild = child.getValue().getLeftNode();
171                                                 if (RNAchild != null
172                                                                 && RNAchild.getValue() != null
173                                                                 && (canBePartOfAnHelix(RNAchild.getValue()) || canBePartOfABrokenHelix(RNAchild.getValue()) )) {
174                                                         helixChildren++;
175                                                         addToStack2.add(child);
176                                                 } else {
177                                                         addToStack1.add(child);
178                                                 }
179                                         }
180                                         // We add the children in their reverse order so they
181                                         // are given in the original order by the iterator
182                                         for (int i=addToStack2.size()-1; i>=0; i--) {
183                                                 remainingNodes.add(addToStack2.get(i));
184                                         }
185                                         for (int i=addToStack1.size()-1; i>=0; i--) {
186                                                 remainingNodes.add(addToStack1.get(i));
187                                         }
188                                         if (helixChildren >= 2) {
189                                                 // We cannot "grow" the current helix, because we have a multiloop
190                                                 // in the RNA.
191                                                 currentHelix = null;
192                                                 //nodesInSameHelix.clear();
193                                         }
194                                 }
195                         }
196                 }
197                 
198                 
199                 // Now recover pseudoknots (broken helices)
200                 {
201                         // First create a temporary mapping with broken helices
202                         LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
203                         remainingNodes.add(alignment);
204                         RNATemplateMapping tempPKMapping = new RNATemplateMapping();
205                         while (!remainingNodes.isEmpty()) {
206                                 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
207                                 remainingNodes.removeLast();
208                                 List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
209                                 int n = children.size();
210                                 if (n > 0) {
211                                         for (int i=n-1; i>=0; i--) {
212                                                 // We add the children in their reverse order so they
213                                                 // are given in the original order by the iterator
214                                                 remainingNodes.add(children.get(i));
215                                         }
216                                         List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
217                                         RNATemplateHelix currentHelix = null;
218                                         for (Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child: node.getChildren()) {
219                                                 Tree<RNANodeValue2> leftNode = child.getValue().getLeftNode();
220                                                 Tree<RNANodeValueTemplate> rightNode = child.getValue().getRightNode();
221                                                 
222                                                 if (leftNode != null && leftNode.getValue() != null) {
223                                                         RNANodeValue2 leftNodeValue = leftNode.getValue();
224                                                         // We have a real left (RNA side) node
225                                                         
226                                                         if (rightNode != null && rightNode.getValue() != null) {
227                                                                 // We have a real right (template side) node
228                                                                 RNANodeValueTemplate rightNodeValue = rightNode.getValue();
229                                                                 
230                                                                 if (rightNodeValue instanceof RNANodeValueTemplateBrokenBasePair
231                                                                                 && canBePartOfABrokenHelix(leftNodeValue)) {
232                                                                         RNATemplateHelix helix = ((RNANodeValueTemplateBrokenBasePair) rightNodeValue).getHelix();
233                                                                         currentHelix = helix;
234                                                                         tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), helix);
235                                                                         
236                                                                         // Maybe we have marked nodes as part of the same helix
237                                                                         // when we didn't know yet which helix it was.
238                                                                         for (RNANodeValue2 v: nodesInSameHelix) {
239                                                                                 tempPKMapping.addCouple(v.getNode().getLeftBasePosition(), helix);
240                                                                         }
241                                                                         nodesInSameHelix.clear();
242                                                                 } else {
243                                                                         currentHelix = null;
244                                                                         nodesInSameHelix.clear();
245                                                                 }
246                                                         } else {
247                                                                 // We have no right (template side) node
248                                                                 if (canBePartOfABrokenHelix(leftNodeValue)) {
249                                                                         if (currentHelix != null) {
250                                                                                 // We may be in this case if the RNA sequence
251                                                                                 // contains a longer helix than in the template
252                                                                                 tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), currentHelix);
253                                                                         } else {
254                                                                                 // Maybe this left node is part of an helix
255                                                                                 // which is smaller in the template
256                                                                                 nodesInSameHelix.add(leftNodeValue);
257                                                                         }
258                                                                 } else {
259                                                                         currentHelix = null;
260                                                                         nodesInSameHelix.clear();
261                                                                 }
262                                                         }
263                                                 } else {
264                                                         currentHelix = null;
265                                                         nodesInSameHelix.clear();
266                                                 }
267                                         }
268                                 }
269                         }
270                         
271                         
272                         // As parts of broken helices were aligned independently,
273                         // we need to check for consistency, ie. keep only bases for
274                         // which the associated base is also aligned with the same helix.
275                         for (RNATemplateElement element: tempPKMapping.getTargetElemsAsSet()) {
276                                 RNATemplateHelix helix = (RNATemplateHelix) element;
277                                 HashSet<Integer> basesInHelix = new HashSet<Integer>(tempPKMapping.getAncestor(helix));
278                                 for (int baseIndex: basesInHelix) {
279                                         System.out.println("PK: " + helix + " aligned with " + baseIndex);
280                                         boolean baseOK = false;
281                                         // Search for an associated base aligned with the same helix
282                                         ArrayList<ModeleBP> auxBasePairs = rna.getAuxBPs(baseIndex);
283                                         for (ModeleBP auxBasePair: auxBasePairs) {
284                                                 int partner5 = ((ModeleBaseNucleotide) auxBasePair.getPartner5()).getIndex();
285                                                 int partner3 = ((ModeleBaseNucleotide) auxBasePair.getPartner3()).getIndex();
286                                                 if (baseIndex == partner5) {
287                                                         if (basesInHelix.contains(partner3)) {
288                                                                 baseOK = true;
289                                                                 break;
290                                                         }
291                                                 } else if (baseIndex == partner3) {
292                                                         if (basesInHelix.contains(partner5)) {
293                                                                 baseOK = true;
294                                                                 break;
295                                                         }
296                                                 }
297                                         }
298                                         if (baseOK) {
299                                                 // Add it to the real mapping
300                                                 mapping.addCouple(baseIndex, helix);
301                                         }
302                                 }
303                         }
304                 }
305                 
306                 
307                 return mapping;
308         }
309         
310         
311         
312         public static void printMapping(RNATemplateMapping mapping, RNATemplate template, String sequence) {
313                 Iterator<RNATemplateElement> iter = template.rnaIterator();
314                 while (iter.hasNext()) {
315                         RNATemplateElement element = iter.next();
316                         System.out.println(element.toString());
317                         ArrayList<Integer> A = mapping.getAncestor(element);
318                         if (A != null) {
319                                 RNATemplateAlign.printIntArrayList(A);
320                                 for (int n=A.size(), i=0; i<n; i++) {
321                                         System.out.print("\t" + sequence.charAt(A.get(i)));
322                                 }
323                                 System.out.println("");
324                         } else {
325                                 System.out.println("\tno match");
326                         }
327                 }
328         }
329         
330         
331         /**
332          * Align the given RNA with the given RNA template.
333          */
334         public static TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
335                 try {
336                         Tree<RNANodeValue2> rnaAsTree = RNATree2.RNATree2FromRNA(rna);
337                         Tree<RNANodeValueTemplate> templateAsTree = template.toTree();
338                         TreeAlign<RNANodeValue2,RNANodeValueTemplate> treeAlign = new TreeAlign<RNANodeValue2,RNANodeValueTemplate>(new RNANodeValue2TemplateDistance());
339                         TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> result = treeAlign.align(rnaAsTree, templateAsTree);
340                         return result;
341                 } catch (RNATree2Exception e) {
342                         throw (new RNATemplateDrawingAlgorithmException("RNATree2Exception: " + e.getMessage()));
343                 } catch (ExceptionInvalidRNATemplate e) {
344                         throw (new RNATemplateDrawingAlgorithmException("ExceptionInvalidRNATemplate: " + e.getMessage()));
345                 } catch (TreeAlignException e) {
346                         throw (new RNATemplateDrawingAlgorithmException("TreeAlignException: " + e.getMessage()));
347                 }
348         }
349         
350         /**
351          * Map an RNA with an RNATemplate using tree alignment.
352          */
353         public static RNATemplateMapping mapRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
354                 try {
355                         TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult = RNATemplateAlign.alignRNAWithTemplate(rna, template); 
356                         RNATemplateMapping mapping = RNATemplateAlign.makeTemplateMapping(alignResult, rna);
357                         return mapping;
358                 } catch (RNATemplateMappingException e) {
359                         e.printStackTrace();
360                         throw (new RNATemplateDrawingAlgorithmException("RNATemplateMappingException: " + e.getMessage()));
361                 }
362         }
363         
364         
365
366         /**
367          * Print an integer array.
368          */
369         public static void printIntArray(int[] A) {
370                 for (int i=0; i<A.length; i++) {
371                         System.out.print("\t" + A[i]);
372                 }
373                 System.out.println("");
374         }
375
376         /**
377          * Print an integer ArrayList.
378          */
379         public static void printIntArrayList(ArrayList<Integer> A) {
380                 for (int i=0; i<A.size(); i++) {
381                         System.out.print("\t" + A.get(i));
382                 }
383                 System.out.println("");
384         }
385
386         /**
387          * Print an matrix of shorts.
388          */
389         public static void printShortMatrix(short[][] M) {
390                 System.out.println("Begin matrix");
391                 for (int i=0; i<M.length; i++) {
392                         for (int j=0; j<M[i].length; j++) {
393                                 System.out.print("\t" + M[i][j]);
394                         }
395                         System.out.println("");
396                 }
397                 System.out.println("End matrix");
398         }
399         
400         /**
401          * Convert a list of integers into an array of integers.
402          * The returned arrays is freshly allocated.
403          * Returns null if given null.
404          */
405         public static int[] intArrayFromList(List<Integer> l) {
406                 if (l != null) {
407                         int n = l.size();
408                         int[] result = new int[n];
409                         for (int i=0; i<n; i++) {
410                                 result[i] = l.get(i);
411                         }
412                         return result;
413                 } else {
414                         return null;
415                 }
416         }
417         
418 }