From: Daniel Barton Date: Fri, 30 Aug 2013 12:18:55 +0000 (+0100) Subject: JAL-1358 Cleaned up RNAalifoldClient.java. RNA helices colouring bug and parameters... X-Git-Tag: Jalview_2_9~200^2~21^2~17 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=a4afd02409c7c45ee4042bf81b310a537d75c73d;hp=f8164ef6f241c5eb111c413fb8594e67b49ce671;p=jalview.git JAL-1358 Cleaned up RNAalifoldClient.java. RNA helices colouring bug and parameters bug unsolved --- diff --git a/lib/min-jabaws-client-3.0.0.jar b/lib/min-jabaws-client-3.0.0.jar index 24d2165..531b8d1 100644 Binary files a/lib/min-jabaws-client-3.0.0.jar and b/lib/min-jabaws-client-3.0.0.jar differ diff --git a/src/jalview/ws/jws2/RNAalifoldClient.java b/src/jalview/ws/jws2/RNAalifoldClient.java index 090b2a4..6f4dc48 100644 --- a/src/jalview/ws/jws2/RNAalifoldClient.java +++ b/src/jalview/ws/jws2/RNAalifoldClient.java @@ -13,8 +13,9 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; +import java.util.LinkedHashMap; import java.util.TreeSet; +import java.util.regex.Pattern; import compbio.data.sequence.RNAStructReader.AlifoldResult; import compbio.data.sequence.RNAStructScoreManager; @@ -26,8 +27,6 @@ public class RNAalifoldClient extends JabawsAlignCalcWorker implements AlignCalcWorkerI { - // test - String methodName; AlignFrame af; @@ -41,18 +40,14 @@ public class RNAalifoldClient extends JabawsAlignCalcWorker implements super(sh, alignFrame, preset, paramset); if (arguments == null) arguments = new ArrayList(); - arguments.add(sh.getRunnerConfig().getArgumentByOptionName("-p")); af = alignFrame; methodName = sh.serviceType; - // defult false. Which one here? - // submitGaps = true; nucleotidesAllowed = true; proteinAllowed = false; - arguments.add(sh.getRunnerConfig().getArgumentByOptionName("-p")); } @Override @@ -76,97 +71,76 @@ public class RNAalifoldClient extends JabawsAlignCalcWorker implements List structs = ((RNAStructScoreManager) scoremanager).getStructs(); List> data = ((RNAStructScoreManager) scoremanager).getData(); + System.out.println("Length of RNAStructScoreManager is: " + structs.size()); + // test to see if this data object contains base pair contacts Score fscore = data.get(0).first(); this.bpScores = (fscore.getMethod().equals( AlifoldResult.contactProbabilities.toString())); - // Add annotations for the mfe Structure - if (bpScores) - createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(1), - data.get(0), data.get(1)); - else - createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(1), - data.get(1)); - // add annotation for the consensus sequence alignment - createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(0), null); - - // Not loop for the rest of the Annotations - if (structs.size() > 2) { - for (int i = 2; i < structs.size(); i++) { - // I can't think of a nice way of presenting the ensembleValues data - // so I wont for now. - if (!data.get(i).first().getMethod().equals( - AlifoldResult.ensembleValues.toString())) { - createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(i), - data.get(i)); - } - } + createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(0), + null, null); + + // Add annotations for the mfe Structure + createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(1), + data.get(1), null); + + // decide whether to add base pair contact probability histogram + int count = 2; + if (bpScores) { + createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(2), + data.get(0), data.get(2)); + count++; + } + + + // Now loop for the rest of the Annotations (if there it isn't stochastic output + // only the centroid and MEA structures remain anyway) + for (int i = count; i < structs.size(); i++) { + // The ensemble values should be displayed in the description of the + // first (or all?) Stochastic Backtrack Structures. + if (!data.get(i).first().getMethod().equals( + AlifoldResult.ensembleValues.toString())) { + + createAnnotationRowforScoreHolder(ourAnnot, getCalcId(), structs.get(i), + data.get(i), null); + } } - if (ourAnnot.size() > 0) { - - // Modify the visible annotation on the alignment viewport with the - // new alignment annotation rows created. + updateOurAnnots(ourAnnot); - // ap.adjustAnnotationHeight(); + ap.adjustAnnotationHeight(); } } } - // just for the base pair contact annotation. It uses a second score object. - protected void createAnnotationRowforScoreHolder( - List ourAnnot, String calcId, - String struct, TreeSet data, TreeSet descriptionData) { - - String typename = data.first().getMethod().toString(); - - AlignmentAnnotation annotation = alignViewport.getAlignment() - .findOrCreateAnnotation(typename, calcId, false, null, null); - - constructAnnotationFromContactProbabilities(annotation, struct, data); - - String description = constructAlignmentAnnotationDescription(descriptionData.first()); - if (description.length() == 0) description = typename; - annotation.description = description; - - // dan test - annotation.belowAlignment = false; - - annotation.validateRangeAndDisplay(); - - ourAnnot.add(annotation); - } - protected void createAnnotationRowforScoreHolder( List ourAnnot, String calcId, - String struct, TreeSet data) + String struct, TreeSet data, TreeSet descriptionData) { /* If contactProbability information is returned from RNAalifold it is stored * in the first TreeSet object corresponding to the String Id which * holds the consensus alignment. The method enumeration is then updated to - * AlifoldResult.contactProbabilties. This line (hack) recreates the same + * AlifoldResult.contactProbabilties. This line recreates the same * data object as was overwritten with the contact probabilites data. */ if (data == null) data = compbio.data.sequence.RNAStructReader .newEmptyScore(AlifoldResult.consensusAlignment); - String typename = data.first().getMethod().toString(); + if (descriptionData == null) descriptionData = data; + + String[] typenameAndDescription = constructTypenameAndDescription( + descriptionData.first()); + String typename = typenameAndDescription[0]; + String description = typenameAndDescription[1]; + AlignmentAnnotation annotation = alignViewport.getAlignment() .findOrCreateAnnotation(typename, calcId, false, null, null); - // construct annotation from ScoreHolder (unpacked into struct and data) - if (bpScores && data.first().getMethod().equals( - AlifoldResult.contactProbabilities.toString())) - constructAnnotationFromContactProbabilities(annotation, struct, data); - - else - // if bpScores is false the TreeSet data should always contain - // a single Score object - constructAnnotationFromStructureString(annotation, struct, data.first()); + constructAnnotationFromScoreHolder(annotation, struct, data); /* update annotation description with the free Energy, frequency in ensemble * or other data where appropriate. @@ -174,51 +148,94 @@ public class RNAalifoldClient extends JabawsAlignCalcWorker implements * Doesnt deal with AlifoldResult.ensembleValues, the free energy of ensemble * and frequency of mfe structure in ensemble. How to deal with these? */ - String description = constructAlignmentAnnotationDescription(data.first()); - if (description.length() == 0) description = typename; annotation.description = description; - // dan test annotation.belowAlignment = false; +// annotation.showAllColLabels = true; annotation.validateRangeAndDisplay(); ourAnnot.add(annotation); } - - - private AlignmentAnnotation constructAnnotationFromStructureString( - AlignmentAnnotation annotation, String struct, Score score) + + + private AlignmentAnnotation constructAnnotationFromScoreHolder( + AlignmentAnnotation annotation, String struct, TreeSet data) { - Annotation[] anns = new Annotation[struct.length()]; - - for (int i = 0; i < struct.length(); i++) { - anns[i] = new Annotation(struct.substring(i, i+1), "", - struct.charAt(i), Float.NaN); + + if (data != null && data.size() > 1 && data.first().getMethod().equals( + AlifoldResult.contactProbabilities.toString())) + { + + // The base pair probabilities are stored in a set in scoreholder. we want a map + LinkedHashMap basePairs = new LinkedHashMap(); + for (Score score : data) { + // The Score objects contain a set of size one containing the range and + // an ArrayList of size one containing the probabilty + basePairs.put(score.getRanges().first(), new Float(score.getScores().get(0))); + } + for (int i = 0; i < struct.length(); i++) { + + // Return all the contacts associated with position i + LinkedHashMap contacts = isContact(basePairs, i+1); + + String description = ""; + float prob = 0f; + + if (contacts.size() == 0) { + description = "No Data"; + } + else { + for (Range contact : contacts.keySet()) { + float t = contacts.get(contact); + if (t > prob) prob = t; + description += Integer.toString(contact.from) + "->" + + Integer.toString(contact.to) + ": " + Float.toString(t) + "% | "; + } + } + + anns[i] = new Annotation(struct.substring(i, i+1), description, + isSS(struct.charAt(i)), prob); + } + } + else if (data == null || data.size() == 1) { + for (int i = 0; i < struct.length(); i++) { + + anns[i] = new Annotation(struct.substring(i, i+1), "", + isSS(struct.charAt(i)), Float.NaN); + } + + annotation.graph = 0; // No graph } - annotation.graph = 0; // No graph annotation.annotations = anns; - return annotation; - } - - private String constructAlignmentAnnotationDescription(Score score) { + + private String[] constructTypenameAndDescription(Score score) { String description = ""; + String typename = ""; String datatype = score.getMethod(); - if (datatype.equals(AlifoldResult.mfeStructure.toString()) || - datatype.equals(AlifoldResult.centroidStructure.toString())) { - description = MessageFormat.format("Energy: {0} = {1} + {2}", + if (datatype.equals(AlifoldResult.mfeStructure.toString())) { + + description = MessageFormat.format("Minimum Free Energy Structure. Energy: {0} = {1} + {2}", score.getScores().get(0), score.getScores().get(1), score.getScores().get(2)); + typename = "MFE Structure"; } else if (datatype.equals(AlifoldResult.contactProbabilityStructure.toString())) { - description = MessageFormat.format("Energy: {0} Frequency: {1}", + description = MessageFormat.format("Base Pair Contact Probabilities. " + + "Energy of Ensemble: {0} Frequency of Ensemble: {1}", score.getScores().get(0), score.getScores().get(1)); + typename = "Contact Probabilities"; + } + else if (datatype.equals(AlifoldResult.centroidStructure.toString())) { + description = MessageFormat.format("Centroid Structure. Energy: {0} = {1} + {2}", + score.getScores().get(0), score.getScores().get(1), score.getScores().get(2)); + typename = "Centroid Structure"; } else if (datatype.equals(AlifoldResult.stochBTStructure.toString())) { if (score.getScores().size() > 0) { @@ -230,68 +247,42 @@ public class RNAalifoldClient extends JabawsAlignCalcWorker implements else if (datatype.equals(AlifoldResult.MEAStucture.toString())) { description = MessageFormat.format("Maximum Expected Accuracy Values: '{' {0} MEA={1} '}", score.getScores().get(0), score.getScores().get(1)); + typename = "MEA Structure"; } - - return description; - } - - - private AlignmentAnnotation constructAnnotationFromContactProbabilities( - AlignmentAnnotation annotation, String struct, TreeSet data) - { - Annotation[] anns = new Annotation[struct.length()]; - - TreeMap basePairs = null; - // The base pair probabilities are stored in a set in scoreholder. we want a map - basePairs = new TreeMap(); - for (Score score : data) { - // The Score objects contain a set of size one containing the range and - // an ArrayList of size one containing the probabilty - basePairs.put(score.getRanges().first(), new Float(score.getScores().get(0))); + else if (datatype.equals(AlifoldResult.consensusAlignment.toString())) { + typename = "RNAalifold Consensus"; + description = "Consensus Alignment Produced by RNAalifold"; } - for (int i = 0; i < struct.length(); i++) { - - // Return all the contacts associated with position i - List contacts = isContact(basePairs, i+1); - - if (contacts.size() == 0) { - anns[i] = new Annotation(struct.substring(i, i+1), "", struct.charAt(i), 0f); - } - else if (contacts.size() == 1) { - // There is only one contact associated with this base - float prob = basePairs.get(contacts.get(0)); - anns[i] = new Annotation(struct.substring(i, i+1), "", struct.charAt(i), prob); - } - else if (contacts.size() > 1) { - // For now we will simply deal with alternate contact information by mentioning its - // existance in the description - float prob = basePairs.get(contacts.get(0)); - anns[i] = new Annotation(struct.substring(i, i+1), "This base has alternate contacts", - struct.charAt(i), prob); - } + else { + typename = datatype; + description = typename; } - annotation.annotations = anns; - - return annotation; + return new String[] {typename, description}; } - // Check whether, at position i there is a base contact and return all the // contacts at this position. Should be in order of descending probability. - private List isContact(TreeMap basePairs, int i) { - - List contacts = new ArrayList(); + private LinkedHashMap isContact(LinkedHashMap basePairs, int i) + { + LinkedHashMap contacts = new LinkedHashMap(); for (Range contact : basePairs.keySet()) { // finds the contacts associtated with position i ordered by the natural // ordering of the Scores TreeSet in ScoreManager which is, descending probability - if (contact.from == i || contact.to == i) contacts.add(contact); + if (contact.from == i || contact.to == i) + contacts.put(contact, basePairs.get(contact)); } return contacts; } + private char isSS (char chr) { + String regex = "\\(|\\)|\\{|\\}|\\[|\\]"; + char ss = (Pattern.matches(regex, Character.toString(chr))) ? 'S': ' '; + return ss; + } public String getCalcId() { diff --git a/test/jalview/ws/jabaws/RNAStructExportImport.java b/test/jalview/ws/jabaws/RNAStructExportImport.java index 74a24aa..4e407af 100644 --- a/test/jalview/ws/jabaws/RNAStructExportImport.java +++ b/test/jalview/ws/jabaws/RNAStructExportImport.java @@ -46,8 +46,6 @@ public class RNAStructExportImport for (Jws2Instance svc : disc.getServices()) { - System.out.println("Service type: " + svc.serviceType); - if (svc.getServiceTypeURI().toLowerCase().contains("rnaalifoldws")) { rnaalifoldws = svc; @@ -60,8 +58,6 @@ public class RNAStructExportImport jalview.io.FileLoader fl = new jalview.io.FileLoader(false); - // Following this method a long way we find some (probably important!) - // code that I have just commented out! af = fl.LoadFileWaitTillLoaded(testseqs, jalview.io.FormatAdapter.FILE); assertNotNull("Couldn't load test data ('" + testseqs + "')", af); @@ -78,16 +74,13 @@ public class RNAStructExportImport } } - /** - * test for patches to JAL-1294 - */ @Test public void testRNAStructExport() { + + alifoldClient = new RNAalifoldClient(rnaalifoldws, af, null, null); - - System.out.println("START FOLDING"); af.getViewport().getCalcManager().startWorker(alifoldClient); @@ -102,37 +95,9 @@ public class RNAStructExportImport ; } while (af.getViewport().getCalcManager().isWorking()); - System.out.println("END FOLDING"); - - // ALL FOR TESTING - AlignCalcManagerI test = af.getViewport().getCalcManager(); - RNAalifoldClient testWorker = ((RNAalifoldClient)test.getRegisteredWorkersOfClass(RNAalifoldClient.class).get(0)); - testWorker.updateResultAnnotation(true); - System.out.println("Annotation from RNAalifoldclient"); - for (Annotation ann : testWorker.ourAnnots.get(0).annotations) { - System.out.print(ann.toString()+"|"); - } - System.out.println(); - - // Why are the AlignViewport.alignment and the RNAalifoldClient alignment - // Annotations different AlignmentI orig_alig = af.getViewport().getAlignment(); - System.out.println("orig_alig has class: " + orig_alig.getClass()); - - // some time before here but after the RNAalifoldClient Update method - // the alignment annotation is replaced.... - - System.out.println("orig_alig annotation:\n"); - for (AlignmentAnnotation an : orig_alig.getAlignmentAnnotation()) { - for (Annotation ann : an.annotations) { - System.out.print(ann.toString()+"|"); - } - System.out.println(); - } - - testAnnotationFileIO("Testing RNAalifold Annotation IO", orig_alig); } @@ -141,11 +106,10 @@ public class RNAStructExportImport { try { - String aligfileout = new FormatAdapter().formatSequences("CLUSTAL", + // what format would be appropriate for RNAalifold annotations? + String aligfileout = new FormatAdapter().formatSequences("PFAM", al.getSequencesArray()); - // test -// System.out.println("aligfileout:\n" + aligfileout); String anfileout = new AnnotationFile().printAnnotations( al.getAlignmentAnnotation(), al.getGroups(), @@ -164,8 +128,9 @@ public class RNAStructExportImport System.out.println("Output annotation file:\n" + anfileout + "\n<