X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=srcjar%2Ffr%2Forsay%2Flri%2Fvarna%2Fmodels%2Ftemplates%2FRNATemplateAlign.java;fp=srcjar%2Ffr%2Forsay%2Flri%2Fvarna%2Fmodels%2Ftemplates%2FRNATemplateAlign.java;h=358e280e39d02c85cb49d4ff472deca521ad1fb4;hb=2d6292c0377bc6b773c6844a45d3f2c5fac352c7;hp=0000000000000000000000000000000000000000;hpb=954af328a2a6a0055572cd1a09ee035301222574;p=jalview.git diff --git a/srcjar/fr/orsay/lri/varna/models/templates/RNATemplateAlign.java b/srcjar/fr/orsay/lri/varna/models/templates/RNATemplateAlign.java new file mode 100644 index 0000000..358e280 --- /dev/null +++ b/srcjar/fr/orsay/lri/varna/models/templates/RNATemplateAlign.java @@ -0,0 +1,418 @@ +package fr.orsay.lri.varna.models.templates; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import fr.orsay.lri.varna.exceptions.ExceptionInvalidRNATemplate; +import fr.orsay.lri.varna.models.rna.ModeleBaseNucleotide; +import fr.orsay.lri.varna.models.rna.ModeleBP; +import fr.orsay.lri.varna.models.rna.RNA; +import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateElement; +import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateHelix; +import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateUnpairedSequence; +import fr.orsay.lri.varna.models.treealign.AlignedNode; +import fr.orsay.lri.varna.models.treealign.RNANodeValue; +import fr.orsay.lri.varna.models.treealign.RNANodeValue2; +import fr.orsay.lri.varna.models.treealign.RNATree2; +import fr.orsay.lri.varna.models.treealign.RNATree2Exception; +import fr.orsay.lri.varna.models.treealign.Tree; +import fr.orsay.lri.varna.models.treealign.TreeAlign; +import fr.orsay.lri.varna.models.treealign.TreeAlignException; +import fr.orsay.lri.varna.models.treealign.TreeAlignResult; + +/** + * This class is about the alignment between a tree of RNANodeValue2 + * and a tree of RNANodeValueTemplate. + * + * @author Raphael Champeimont + */ +public class RNATemplateAlign { + + // We check this node can be part of a non-broken helix. + private static boolean canBePartOfAnHelix(RNANodeValue2 leftNodeValue) { + return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() > 0; + } + + private static boolean canBePartOfASequence(RNANodeValue2 leftNodeValue) { + return (leftNodeValue != null) && !leftNodeValue.isSingleNode(); + } + + private static boolean canBePartOfABrokenHelix(RNANodeValue2 leftNodeValue) { + return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() < 0; + } + + + /** + * This method takes an alignment between a tree of RNANodeValue2 + * of RNANodeValue and a tree of RNANodeValue2 of RNANodeValueTemplate, + * and the original RNA object that was used to create the first tree + * in the alignment. + * It returns the corresponding RNATemplateMapping. + */ + public static RNATemplateMapping makeTemplateMapping(TreeAlignResult alignResult, RNA rna) throws RNATemplateMappingException { + RNATemplateMapping mapping = new RNATemplateMapping(); + Tree> alignment = alignResult.getAlignment(); + mapping.setDistance(alignResult.getDistance()); + + // Map sequences and helices together, without managing pseudoknots + { + // We will go through the tree using a DFS + // The reason why this algorithm is not trivial is that we may have + // a longer helix on the RNA side than on the template side, in which + // case some nodes on the RNA side are going to be alone while we + // would want them to be part of the helix. + RNATemplateHelix currentHelix = null; + LinkedList>> remainingNodes = new LinkedList>>(); + List nodesInSameHelix = new LinkedList(); + remainingNodes.add(alignment); + while (!remainingNodes.isEmpty()) { + Tree> node = remainingNodes.getLast(); + remainingNodes.removeLast(); + + Tree leftNode = node.getValue().getLeftNode(); + Tree rightNode = node.getValue().getRightNode(); + + // Do we have something on RNA side? + if (leftNode != null && leftNode.getValue() != null) { + RNANodeValue2 leftNodeValue = leftNode.getValue(); + + // Do we have something on template side? + if (rightNode != null && rightNode.getValue() != null) { + RNANodeValueTemplate rightNodeValue = rightNode.getValue(); + + if (rightNodeValue instanceof RNANodeValueTemplateBasePair + && canBePartOfAnHelix(leftNodeValue)) { + RNATemplateHelix helix = ((RNANodeValueTemplateBasePair) rightNodeValue).getHelix(); + currentHelix = helix; + int i = leftNodeValue.getNode().getLeftBasePosition(); + int j = leftNodeValue.getNode().getRightBasePosition(); + mapping.addCouple(i, helix); + mapping.addCouple(j, helix); + + // Maybe we have marked nodes as part of the same helix + // when we didn't know yet which helix it was. + if (nodesInSameHelix.size() > 0) { + for (RNANodeValue2 v: nodesInSameHelix) { + int k = v.getNode().getLeftBasePosition(); + int l = v.getNode().getRightBasePosition(); + // We want to check nodesInSameHelix is a parent helix and not a sibling. + boolean validExtension = (k < i) && (j < l); + if (validExtension) { + mapping.addCouple(v.getNode().getLeftBasePosition(), helix); + mapping.addCouple(v.getNode().getRightBasePosition(), helix); + } + } + } + nodesInSameHelix.clear(); + + } else if (rightNodeValue instanceof RNANodeValueTemplateSequence + && canBePartOfASequence(leftNodeValue)) { + currentHelix = null; + nodesInSameHelix.clear(); + RNATemplateUnpairedSequence sequence = ((RNANodeValueTemplateSequence) rightNodeValue).getSequence(); + for (RNANodeValue nodeValue: leftNode.getValue().getNodes()) { + mapping.addCouple(nodeValue.getLeftBasePosition(), sequence); + } + } else { + // Pseudoknot in template + currentHelix = null; + nodesInSameHelix.clear(); + } + + } else { + // We have nothing on template side + + if (canBePartOfAnHelix(leftNodeValue)) { + if (currentHelix != null) { + // We may be in this case when the RNA + // contains a longer helix than in the template + int i = leftNodeValue.getNode().getLeftBasePosition(); + int j = leftNodeValue.getNode().getRightBasePosition(); + int k = Integer.MAX_VALUE; + int l = Integer.MIN_VALUE; + for (int b: mapping.getAncestor(currentHelix)) { + k = Math.min(k, b); + l = Math.max(l, b); + } + // We want to check currentHelix is a parent helix and not a sibling. + boolean validExtension = (k < i) && (j < l); + if (validExtension) { + mapping.addCouple(i, currentHelix); + mapping.addCouple(j, currentHelix); + } + } else { + // Maybe this left node is part of an helix + // which is smaller in the template + nodesInSameHelix.add(leftNodeValue); + } + } + } + } + + + // If this node has children, add them in the stack + List>> children = node.getChildren(); + int n = children.size(); + if (n > 0) { + int helixChildren = 0; + // For each subtree, we want the sequences (in RNA side) to be treated first + // and then the helix nodes, because finding an aligned sequence tells + // us we cannot grow a current helix + ArrayList>> addToStack1 = new ArrayList>>(); + ArrayList>> addToStack2 = new ArrayList>>(); + for (int i=0; i> child = children.get(i); + Tree RNAchild = child.getValue().getLeftNode(); + if (RNAchild != null + && RNAchild.getValue() != null + && (canBePartOfAnHelix(RNAchild.getValue()) || canBePartOfABrokenHelix(RNAchild.getValue()) )) { + helixChildren++; + addToStack2.add(child); + } else { + addToStack1.add(child); + } + } + // We add the children in their reverse order so they + // are given in the original order by the iterator + for (int i=addToStack2.size()-1; i>=0; i--) { + remainingNodes.add(addToStack2.get(i)); + } + for (int i=addToStack1.size()-1; i>=0; i--) { + remainingNodes.add(addToStack1.get(i)); + } + if (helixChildren >= 2) { + // We cannot "grow" the current helix, because we have a multiloop + // in the RNA. + currentHelix = null; + //nodesInSameHelix.clear(); + } + } + } + } + + + // Now recover pseudoknots (broken helices) + { + // First create a temporary mapping with broken helices + LinkedList>> remainingNodes = new LinkedList>>(); + remainingNodes.add(alignment); + RNATemplateMapping tempPKMapping = new RNATemplateMapping(); + while (!remainingNodes.isEmpty()) { + Tree> node = remainingNodes.getLast(); + remainingNodes.removeLast(); + List>> children = node.getChildren(); + int n = children.size(); + if (n > 0) { + for (int i=n-1; i>=0; i--) { + // We add the children in their reverse order so they + // are given in the original order by the iterator + remainingNodes.add(children.get(i)); + } + List nodesInSameHelix = new LinkedList(); + RNATemplateHelix currentHelix = null; + for (Tree> child: node.getChildren()) { + Tree leftNode = child.getValue().getLeftNode(); + Tree rightNode = child.getValue().getRightNode(); + + if (leftNode != null && leftNode.getValue() != null) { + RNANodeValue2 leftNodeValue = leftNode.getValue(); + // We have a real left (RNA side) node + + if (rightNode != null && rightNode.getValue() != null) { + // We have a real right (template side) node + RNANodeValueTemplate rightNodeValue = rightNode.getValue(); + + if (rightNodeValue instanceof RNANodeValueTemplateBrokenBasePair + && canBePartOfABrokenHelix(leftNodeValue)) { + RNATemplateHelix helix = ((RNANodeValueTemplateBrokenBasePair) rightNodeValue).getHelix(); + currentHelix = helix; + tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), helix); + + // Maybe we have marked nodes as part of the same helix + // when we didn't know yet which helix it was. + for (RNANodeValue2 v: nodesInSameHelix) { + tempPKMapping.addCouple(v.getNode().getLeftBasePosition(), helix); + } + nodesInSameHelix.clear(); + } else { + currentHelix = null; + nodesInSameHelix.clear(); + } + } else { + // We have no right (template side) node + if (canBePartOfABrokenHelix(leftNodeValue)) { + if (currentHelix != null) { + // We may be in this case if the RNA sequence + // contains a longer helix than in the template + tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), currentHelix); + } else { + // Maybe this left node is part of an helix + // which is smaller in the template + nodesInSameHelix.add(leftNodeValue); + } + } else { + currentHelix = null; + nodesInSameHelix.clear(); + } + } + } else { + currentHelix = null; + nodesInSameHelix.clear(); + } + } + } + } + + + // As parts of broken helices were aligned independently, + // we need to check for consistency, ie. keep only bases for + // which the associated base is also aligned with the same helix. + for (RNATemplateElement element: tempPKMapping.getTargetElemsAsSet()) { + RNATemplateHelix helix = (RNATemplateHelix) element; + HashSet basesInHelix = new HashSet(tempPKMapping.getAncestor(helix)); + for (int baseIndex: basesInHelix) { + System.out.println("PK: " + helix + " aligned with " + baseIndex); + boolean baseOK = false; + // Search for an associated base aligned with the same helix + ArrayList auxBasePairs = rna.getAuxBPs(baseIndex); + for (ModeleBP auxBasePair: auxBasePairs) { + int partner5 = ((ModeleBaseNucleotide) auxBasePair.getPartner5()).getIndex(); + int partner3 = ((ModeleBaseNucleotide) auxBasePair.getPartner3()).getIndex(); + if (baseIndex == partner5) { + if (basesInHelix.contains(partner3)) { + baseOK = true; + break; + } + } else if (baseIndex == partner3) { + if (basesInHelix.contains(partner5)) { + baseOK = true; + break; + } + } + } + if (baseOK) { + // Add it to the real mapping + mapping.addCouple(baseIndex, helix); + } + } + } + } + + + return mapping; + } + + + + public static void printMapping(RNATemplateMapping mapping, RNATemplate template, String sequence) { + Iterator iter = template.rnaIterator(); + while (iter.hasNext()) { + RNATemplateElement element = iter.next(); + System.out.println(element.toString()); + ArrayList A = mapping.getAncestor(element); + if (A != null) { + RNATemplateAlign.printIntArrayList(A); + for (int n=A.size(), i=0; i alignRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException { + try { + Tree rnaAsTree = RNATree2.RNATree2FromRNA(rna); + Tree templateAsTree = template.toTree(); + TreeAlign treeAlign = new TreeAlign(new RNANodeValue2TemplateDistance()); + TreeAlignResult result = treeAlign.align(rnaAsTree, templateAsTree); + return result; + } catch (RNATree2Exception e) { + throw (new RNATemplateDrawingAlgorithmException("RNATree2Exception: " + e.getMessage())); + } catch (ExceptionInvalidRNATemplate e) { + throw (new RNATemplateDrawingAlgorithmException("ExceptionInvalidRNATemplate: " + e.getMessage())); + } catch (TreeAlignException e) { + throw (new RNATemplateDrawingAlgorithmException("TreeAlignException: " + e.getMessage())); + } + } + + /** + * Map an RNA with an RNATemplate using tree alignment. + */ + public static RNATemplateMapping mapRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException { + try { + TreeAlignResult alignResult = RNATemplateAlign.alignRNAWithTemplate(rna, template); + RNATemplateMapping mapping = RNATemplateAlign.makeTemplateMapping(alignResult, rna); + return mapping; + } catch (RNATemplateMappingException e) { + e.printStackTrace(); + throw (new RNATemplateDrawingAlgorithmException("RNATemplateMappingException: " + e.getMessage())); + } + } + + + + /** + * Print an integer array. + */ + public static void printIntArray(int[] A) { + for (int i=0; i A) { + for (int i=0; i l) { + if (l != null) { + int n = l.size(); + int[] result = new int[n]; + for (int i=0; i