From: jprocter Date: Fri, 7 Dec 2012 15:10:22 +0000 (+0000) Subject: consensus secondary structure algorithm (possibly came from VARNA) X-Git-Tag: Jalview_2_9~221^2^2~8^2~29 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=e9d858816eee0dc27848b450f634605a2de0ba0d;p=jalview.git consensus secondary structure algorithm (possibly came from VARNA) --- diff --git a/src/jalview/analysis/SecStrConsensus.java b/src/jalview/analysis/SecStrConsensus.java new file mode 100644 index 0000000..27c3f7e --- /dev/null +++ b/src/jalview/analysis/SecStrConsensus.java @@ -0,0 +1,219 @@ +package jalview.analysis; + +import java.util.ArrayList; +import java.util.Hashtable; + + +public class SecStrConsensus { + + + /** + * Internal class to represent a simple base-pair. + * @author Yawn + * [JBPNote: ^^ is that Anne Menard or Ya(w)nn Ponty, I wonder ! ] + */ + public static class SimpleBP{ + int bp5; + int bp3; + + public SimpleBP() + { + + } + public SimpleBP(int i5, int i3) + { + bp5=i5; + bp3=i3; + } + public void setBP5(int i5) + { + bp5=i5; + } + + public void setBP3(int i3) + { + bp3=i3; + } + + public int getBP5() + { + return bp5; + } + + public int getBP3() + { + return bp3; + } + + public String toString() + { + return "("+bp5+","+bp3+")"; + } + + } + + public static int[] extractConsensus(ArrayList> bps) + { + // We do not currently know the length of the alignment + // => Estimate it as the biggest index of a base-pair plus one. + int maxlength = 0; + for (ArrayList strs : bps) + { + for (SimpleBP bp : strs) + { + + maxlength = Math.max(1+Math.max(bp.bp5, bp.bp3), maxlength); + + } + } + // Now we have a good estimate for length, allocate and initialize data + // to be fed to the dynamic programming procedure. + ArrayList> seq = new ArrayList>(); + for (int i=0;i()); } + for (ArrayList strs : bps) + { + for (SimpleBP bp : strs) + { + int i = bp.bp5; + int j = bp.bp3; + Hashtable h = seq.get(i); + if (!h.containsKey(j)) + { + h.put(j, 0.0); + } + h.put(j, h.get(j)+1.); + } + } + // At this point, seq contains, at each position i, a hashtable which associates, + // to each possible end j, the number of time a base-pair (i,j) occurs in the alignment + + // We can now run the dynamic programming procedure on this data + double[][] mat = fillMatrix(seq); + ArrayList res = backtrack(mat,seq); + + // Convert it to an array, ie finalres[i] = j >= 0 iff a base-pair (i,j) is present + // in the consensus, or -1 otherwise + int[] finalres = new int[seq.size()]; + for (int i=0;i> seq, int i, int k) + { + return seq.get(i).containsKey(k); + } + + // Returns the score of a potential base-pair, ie the number of structures in which it is found. + private static double basePairScore(ArrayList> seq, int i, int k) + { + return seq.get(i).get(k); + } + + + private static double[][] fillMatrix(ArrayList> seq) + { + int n = seq.size(); + double[][] tab = new double[n][n]; + for(int m=1;m<=n;m++) + { + for(int i=0;ii+1) + { + fact1 = tab[i+1][k-1]; + } + double fact2 = 0; + if (k backtrack(double[][] tab,ArrayList> seq) + { + return backtrack(tab,seq,0,seq.size()-1); + } + + private static ArrayList backtrack(double[][] tab,ArrayList> seq, int i, int j) + { + ArrayList result = new ArrayList(); + if (i indices = new ArrayList(); + indices.add(-1); + for (int k=i+1;k<=j;k++) + { + indices.add(k); + } + for (int k : indices) + { + if (k==-1) + { + if (tab[i][j] == tab[i+1][j]) + { + result = backtrack(tab, seq, i+1,j); + } + } + else + { + if (canBasePair(seq,i,k)) + { + double fact1 = 0; + if (k>i+1) + { + fact1 = tab[i+1][k-1]; + } + double fact2 = 0; + if (k