1 package fr.orsay.lri.varna.models.templates;
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.HashSet;
6 import java.util.Iterator;
7 import java.util.LinkedList;
11 import fr.orsay.lri.varna.exceptions.ExceptionInvalidRNATemplate;
12 import fr.orsay.lri.varna.models.rna.ModeleBaseNucleotide;
13 import fr.orsay.lri.varna.models.rna.ModeleBP;
14 import fr.orsay.lri.varna.models.rna.RNA;
15 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateElement;
16 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateHelix;
17 import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateUnpairedSequence;
18 import fr.orsay.lri.varna.models.treealign.AlignedNode;
19 import fr.orsay.lri.varna.models.treealign.RNANodeValue;
20 import fr.orsay.lri.varna.models.treealign.RNANodeValue2;
21 import fr.orsay.lri.varna.models.treealign.RNATree2;
22 import fr.orsay.lri.varna.models.treealign.RNATree2Exception;
23 import fr.orsay.lri.varna.models.treealign.Tree;
24 import fr.orsay.lri.varna.models.treealign.TreeAlign;
25 import fr.orsay.lri.varna.models.treealign.TreeAlignException;
26 import fr.orsay.lri.varna.models.treealign.TreeAlignResult;
29 * This class is about the alignment between a tree of RNANodeValue2
30 * and a tree of RNANodeValueTemplate.
32 * @author Raphael Champeimont
34 public class RNATemplateAlign {
36 // We check this node can be part of a non-broken helix.
37 private static boolean canBePartOfAnHelix(RNANodeValue2 leftNodeValue) {
38 return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() > 0;
41 private static boolean canBePartOfASequence(RNANodeValue2 leftNodeValue) {
42 return (leftNodeValue != null) && !leftNodeValue.isSingleNode();
45 private static boolean canBePartOfABrokenHelix(RNANodeValue2 leftNodeValue) {
46 return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() < 0;
51 * This method takes an alignment between a tree of RNANodeValue2
52 * of RNANodeValue and a tree of RNANodeValue2 of RNANodeValueTemplate,
53 * and the original RNA object that was used to create the first tree
55 * It returns the corresponding RNATemplateMapping.
57 public static RNATemplateMapping makeTemplateMapping(TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult, RNA rna) throws RNATemplateMappingException {
58 RNATemplateMapping mapping = new RNATemplateMapping();
59 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> alignment = alignResult.getAlignment();
60 mapping.setDistance(alignResult.getDistance());
62 // Map sequences and helices together, without managing pseudoknots
64 // We will go through the tree using a DFS
65 // The reason why this algorithm is not trivial is that we may have
66 // a longer helix on the RNA side than on the template side, in which
67 // case some nodes on the RNA side are going to be alone while we
68 // would want them to be part of the helix.
69 RNATemplateHelix currentHelix = null;
70 LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
71 List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
72 remainingNodes.add(alignment);
73 while (!remainingNodes.isEmpty()) {
74 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
75 remainingNodes.removeLast();
77 Tree<RNANodeValue2> leftNode = node.getValue().getLeftNode();
78 Tree<RNANodeValueTemplate> rightNode = node.getValue().getRightNode();
80 // Do we have something on RNA side?
81 if (leftNode != null && leftNode.getValue() != null) {
82 RNANodeValue2 leftNodeValue = leftNode.getValue();
84 // Do we have something on template side?
85 if (rightNode != null && rightNode.getValue() != null) {
86 RNANodeValueTemplate rightNodeValue = rightNode.getValue();
88 if (rightNodeValue instanceof RNANodeValueTemplateBasePair
89 && canBePartOfAnHelix(leftNodeValue)) {
90 RNATemplateHelix helix = ((RNANodeValueTemplateBasePair) rightNodeValue).getHelix();
92 int i = leftNodeValue.getNode().getLeftBasePosition();
93 int j = leftNodeValue.getNode().getRightBasePosition();
94 mapping.addCouple(i, helix);
95 mapping.addCouple(j, helix);
97 // Maybe we have marked nodes as part of the same helix
98 // when we didn't know yet which helix it was.
99 if (nodesInSameHelix.size() > 0) {
100 for (RNANodeValue2 v: nodesInSameHelix) {
101 int k = v.getNode().getLeftBasePosition();
102 int l = v.getNode().getRightBasePosition();
103 // We want to check nodesInSameHelix is a parent helix and not a sibling.
104 boolean validExtension = (k < i) && (j < l);
105 if (validExtension) {
106 mapping.addCouple(v.getNode().getLeftBasePosition(), helix);
107 mapping.addCouple(v.getNode().getRightBasePosition(), helix);
111 nodesInSameHelix.clear();
113 } else if (rightNodeValue instanceof RNANodeValueTemplateSequence
114 && canBePartOfASequence(leftNodeValue)) {
116 nodesInSameHelix.clear();
117 RNATemplateUnpairedSequence sequence = ((RNANodeValueTemplateSequence) rightNodeValue).getSequence();
118 for (RNANodeValue nodeValue: leftNode.getValue().getNodes()) {
119 mapping.addCouple(nodeValue.getLeftBasePosition(), sequence);
122 // Pseudoknot in template
124 nodesInSameHelix.clear();
128 // We have nothing on template side
130 if (canBePartOfAnHelix(leftNodeValue)) {
131 if (currentHelix != null) {
132 // We may be in this case when the RNA
133 // contains a longer helix than in the template
134 int i = leftNodeValue.getNode().getLeftBasePosition();
135 int j = leftNodeValue.getNode().getRightBasePosition();
136 int k = Integer.MAX_VALUE;
137 int l = Integer.MIN_VALUE;
138 for (int b: mapping.getAncestor(currentHelix)) {
142 // We want to check currentHelix is a parent helix and not a sibling.
143 boolean validExtension = (k < i) && (j < l);
144 if (validExtension) {
145 mapping.addCouple(i, currentHelix);
146 mapping.addCouple(j, currentHelix);
149 // Maybe this left node is part of an helix
150 // which is smaller in the template
151 nodesInSameHelix.add(leftNodeValue);
158 // If this node has children, add them in the stack
159 List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
160 int n = children.size();
162 int helixChildren = 0;
163 // For each subtree, we want the sequences (in RNA side) to be treated first
164 // and then the helix nodes, because finding an aligned sequence tells
165 // us we cannot grow a current helix
166 ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack1 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
167 ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack2 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
168 for (int i=0; i<n; i++) {
169 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child = children.get(i);
170 Tree<RNANodeValue2> RNAchild = child.getValue().getLeftNode();
172 && RNAchild.getValue() != null
173 && (canBePartOfAnHelix(RNAchild.getValue()) || canBePartOfABrokenHelix(RNAchild.getValue()) )) {
175 addToStack2.add(child);
177 addToStack1.add(child);
180 // We add the children in their reverse order so they
181 // are given in the original order by the iterator
182 for (int i=addToStack2.size()-1; i>=0; i--) {
183 remainingNodes.add(addToStack2.get(i));
185 for (int i=addToStack1.size()-1; i>=0; i--) {
186 remainingNodes.add(addToStack1.get(i));
188 if (helixChildren >= 2) {
189 // We cannot "grow" the current helix, because we have a multiloop
192 //nodesInSameHelix.clear();
199 // Now recover pseudoknots (broken helices)
201 // First create a temporary mapping with broken helices
202 LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
203 remainingNodes.add(alignment);
204 RNATemplateMapping tempPKMapping = new RNATemplateMapping();
205 while (!remainingNodes.isEmpty()) {
206 Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
207 remainingNodes.removeLast();
208 List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
209 int n = children.size();
211 for (int i=n-1; i>=0; i--) {
212 // We add the children in their reverse order so they
213 // are given in the original order by the iterator
214 remainingNodes.add(children.get(i));
216 List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
217 RNATemplateHelix currentHelix = null;
218 for (Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child: node.getChildren()) {
219 Tree<RNANodeValue2> leftNode = child.getValue().getLeftNode();
220 Tree<RNANodeValueTemplate> rightNode = child.getValue().getRightNode();
222 if (leftNode != null && leftNode.getValue() != null) {
223 RNANodeValue2 leftNodeValue = leftNode.getValue();
224 // We have a real left (RNA side) node
226 if (rightNode != null && rightNode.getValue() != null) {
227 // We have a real right (template side) node
228 RNANodeValueTemplate rightNodeValue = rightNode.getValue();
230 if (rightNodeValue instanceof RNANodeValueTemplateBrokenBasePair
231 && canBePartOfABrokenHelix(leftNodeValue)) {
232 RNATemplateHelix helix = ((RNANodeValueTemplateBrokenBasePair) rightNodeValue).getHelix();
233 currentHelix = helix;
234 tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), helix);
236 // Maybe we have marked nodes as part of the same helix
237 // when we didn't know yet which helix it was.
238 for (RNANodeValue2 v: nodesInSameHelix) {
239 tempPKMapping.addCouple(v.getNode().getLeftBasePosition(), helix);
241 nodesInSameHelix.clear();
244 nodesInSameHelix.clear();
247 // We have no right (template side) node
248 if (canBePartOfABrokenHelix(leftNodeValue)) {
249 if (currentHelix != null) {
250 // We may be in this case if the RNA sequence
251 // contains a longer helix than in the template
252 tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), currentHelix);
254 // Maybe this left node is part of an helix
255 // which is smaller in the template
256 nodesInSameHelix.add(leftNodeValue);
260 nodesInSameHelix.clear();
265 nodesInSameHelix.clear();
272 // As parts of broken helices were aligned independently,
273 // we need to check for consistency, ie. keep only bases for
274 // which the associated base is also aligned with the same helix.
275 for (RNATemplateElement element: tempPKMapping.getTargetElemsAsSet()) {
276 RNATemplateHelix helix = (RNATemplateHelix) element;
277 HashSet<Integer> basesInHelix = new HashSet<Integer>(tempPKMapping.getAncestor(helix));
278 for (int baseIndex: basesInHelix) {
279 System.out.println("PK: " + helix + " aligned with " + baseIndex);
280 boolean baseOK = false;
281 // Search for an associated base aligned with the same helix
282 ArrayList<ModeleBP> auxBasePairs = rna.getAuxBPs(baseIndex);
283 for (ModeleBP auxBasePair: auxBasePairs) {
284 int partner5 = ((ModeleBaseNucleotide) auxBasePair.getPartner5()).getIndex();
285 int partner3 = ((ModeleBaseNucleotide) auxBasePair.getPartner3()).getIndex();
286 if (baseIndex == partner5) {
287 if (basesInHelix.contains(partner3)) {
291 } else if (baseIndex == partner3) {
292 if (basesInHelix.contains(partner5)) {
299 // Add it to the real mapping
300 mapping.addCouple(baseIndex, helix);
312 public static void printMapping(RNATemplateMapping mapping, RNATemplate template, String sequence) {
313 Iterator<RNATemplateElement> iter = template.rnaIterator();
314 while (iter.hasNext()) {
315 RNATemplateElement element = iter.next();
316 System.out.println(element.toString());
317 ArrayList<Integer> A = mapping.getAncestor(element);
319 RNATemplateAlign.printIntArrayList(A);
320 for (int n=A.size(), i=0; i<n; i++) {
321 System.out.print("\t" + sequence.charAt(A.get(i)));
323 System.out.println("");
325 System.out.println("\tno match");
332 * Align the given RNA with the given RNA template.
334 public static TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
336 Tree<RNANodeValue2> rnaAsTree = RNATree2.RNATree2FromRNA(rna);
337 Tree<RNANodeValueTemplate> templateAsTree = template.toTree();
338 TreeAlign<RNANodeValue2,RNANodeValueTemplate> treeAlign = new TreeAlign<RNANodeValue2,RNANodeValueTemplate>(new RNANodeValue2TemplateDistance());
339 TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> result = treeAlign.align(rnaAsTree, templateAsTree);
341 } catch (RNATree2Exception e) {
342 throw (new RNATemplateDrawingAlgorithmException("RNATree2Exception: " + e.getMessage()));
343 } catch (ExceptionInvalidRNATemplate e) {
344 throw (new RNATemplateDrawingAlgorithmException("ExceptionInvalidRNATemplate: " + e.getMessage()));
345 } catch (TreeAlignException e) {
346 throw (new RNATemplateDrawingAlgorithmException("TreeAlignException: " + e.getMessage()));
351 * Map an RNA with an RNATemplate using tree alignment.
353 public static RNATemplateMapping mapRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
355 TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult = RNATemplateAlign.alignRNAWithTemplate(rna, template);
356 RNATemplateMapping mapping = RNATemplateAlign.makeTemplateMapping(alignResult, rna);
358 } catch (RNATemplateMappingException e) {
360 throw (new RNATemplateDrawingAlgorithmException("RNATemplateMappingException: " + e.getMessage()));
367 * Print an integer array.
369 public static void printIntArray(int[] A) {
370 for (int i=0; i<A.length; i++) {
371 System.out.print("\t" + A[i]);
373 System.out.println("");
377 * Print an integer ArrayList.
379 public static void printIntArrayList(ArrayList<Integer> A) {
380 for (int i=0; i<A.size(); i++) {
381 System.out.print("\t" + A.get(i));
383 System.out.println("");
387 * Print an matrix of shorts.
389 public static void printShortMatrix(short[][] M) {
390 System.out.println("Begin matrix");
391 for (int i=0; i<M.length; i++) {
392 for (int j=0; j<M[i].length; j++) {
393 System.out.print("\t" + M[i][j]);
395 System.out.println("");
397 System.out.println("End matrix");
401 * Convert a list of integers into an array of integers.
402 * The returned arrays is freshly allocated.
403 * Returns null if given null.
405 public static int[] intArrayFromList(List<Integer> l) {
408 int[] result = new int[n];
409 for (int i=0; i<n; i++) {
410 result[i] = l.get(i);