--- /dev/null
+package jalview.analysis;\r
+\r
+import com.stevesoft.pat.Regex;\r
+\r
+import jalview.datamodel.*;\r
+\r
+public class ParseProperties\r
+{\r
+ /**\r
+ * Methods for parsing free text properties on alignments and sequences.\r
+ */\r
+ /**\r
+ * The alignment being operated on\r
+ */\r
+ private AlignmentI al=null;\r
+ \r
+ /**\r
+ * initialise a new property parser\r
+ * @param al\r
+ */\r
+ ParseProperties(AlignmentI al) {\r
+ this.al = al;\r
+ }\r
+\r
+ public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex)\r
+ {\r
+ return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex);\r
+ }\r
+\r
+ public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex) \r
+ {\r
+ return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex);\r
+ }\r
+ /**\r
+ * Extract scores for sequences by applying regex to description string.\r
+ * @param seqs seuqences to extract annotation from.\r
+ * @param ScoreNames labels for each numeric field in regex match\r
+ * @param ScoreDescriptions description for each numeric field in regex match\r
+ * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>\r
+ * @return total number of sequences that matched the regex\r
+ */\r
+ public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex) \r
+ {\r
+ int count=0;\r
+ Regex pattern = new Regex(regex);\r
+ if (pattern.numSubs()>ScoreNames.length)\r
+ {\r
+ // Check that we have enough labels and descriptions for any parsed scores.\r
+ int onamelen = ScoreNames.length;\r
+ String[] tnames = new String[pattern.numSubs()+1];\r
+ System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);\r
+ String base = tnames[ScoreNames.length-1];\r
+ ScoreNames = tnames;\r
+ String descrbase = ScoreDescriptions[ScoreNames.length-1];\r
+ if (descrbase == null)\r
+ descrbase = "Score parsed from ("+regex+")";\r
+ tnames = new String[pattern.numSubs()];\r
+ System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);\r
+ ScoreDescriptions = tnames;\r
+ for (int i=onamelen; i<ScoreNames.length; i++)\r
+ {\r
+ ScoreNames[i] = base+"_"+i;\r
+ ScoreDescriptions[i] = descrbase+" (column "+i+")";\r
+ }\r
+ }\r
+ for (int i=0; i<seqs.length; i++)\r
+ {\r
+ String descr = seqs[i].getDescription();\r
+ if (pattern.search(descr))\r
+ {\r
+ boolean added=false;\r
+ for (int cols=0; cols<pattern.numSubs(); cols++)\r
+ {\r
+ String sstring = pattern.stringMatched(cols);\r
+ float score;\r
+ try {\r
+ score = new Float(sstring).floatValue();\r
+ }\r
+ catch (Exception e)\r
+ {\r
+ // don't try very hard to parse if regex was wrong.\r
+ continue;\r
+ }\r
+ // add score to sequence annotation.\r
+ AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols], ScoreDescriptions[cols], null);\r
+ an.setScore(score);\r
+ seqs[i].addAlignmentAnnotation(an);\r
+ al.addAnnotation(an);\r
+ added=true;\r
+ }\r
+ if (added)\r
+ count++;\r
+ }\r
+ \r
+ }\r
+ return count; \r
+ }\r
+ public static void main(String argv[]) {\r
+ SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),\r
+ new Sequence("sq2","THISISAPLACEHOLDER"),\r
+ new Sequence("sq3","THISISAPLACEHOLDER"),\r
+ new Sequence("sq4","THISISAPLACEHOLDER")};\r
+ seqs[0].setDescription("1 mydescription1");\r
+ seqs[1].setDescription("mydescription2");\r
+ seqs[2].setDescription("2. 0.1 mydescription3");\r
+ seqs[3].setDescription("3 0.01 mydescription4");\r
+ //seqs[4].setDescription("5 mydescription5");\r
+ Alignment al = new Alignment(seqs);\r
+ ParseProperties pp = new ParseProperties(al);\r
+ String regex = ".*([-0-9.+]+).*";\r
+ System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);\r
+ regex = ".*([-0-9.+]+).+([-0-9.+]+).*";\r
+ System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);\r
+ \r
+ }\r
+}\r