Functions for parsing free text properties on sequences, alignments and annotations.
authorjprocter <Jim Procter>
Wed, 25 Apr 2007 11:52:32 +0000 (11:52 +0000)
committerjprocter <Jim Procter>
Wed, 25 Apr 2007 11:52:32 +0000 (11:52 +0000)
src/jalview/analysis/ParseProperties.java [new file with mode: 0644]

diff --git a/src/jalview/analysis/ParseProperties.java b/src/jalview/analysis/ParseProperties.java
new file mode 100644 (file)
index 0000000..015fcfe
--- /dev/null
@@ -0,0 +1,116 @@
+package jalview.analysis;\r
+\r
+import com.stevesoft.pat.Regex;\r
+\r
+import jalview.datamodel.*;\r
+\r
+public class ParseProperties\r
+{\r
+  /**\r
+   * Methods for parsing free text properties on alignments and sequences.\r
+   */\r
+  /**\r
+   * The alignment being operated on\r
+   */\r
+  private AlignmentI al=null;\r
+  \r
+  /**\r
+   * initialise a new property parser\r
+   * @param al\r
+   */\r
+  ParseProperties(AlignmentI al) {\r
+    this.al = al;\r
+  }\r
+\r
+  public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex)\r
+  {\r
+    return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex);\r
+  }\r
+\r
+  public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex) \r
+  {\r
+    return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex);\r
+  }\r
+  /**\r
+   * Extract scores for sequences by applying regex to description string.\r
+   * @param seqs seuqences to extract annotation from.\r
+   * @param ScoreNames labels for each numeric field in regex match\r
+   * @param ScoreDescriptions description for each numeric field in regex match\r
+   * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>\r
+   * @return total number of sequences that matched the regex\r
+   */\r
+  public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex) \r
+  {\r
+    int count=0;\r
+    Regex pattern = new Regex(regex);\r
+    if (pattern.numSubs()>ScoreNames.length)\r
+    {\r
+      // Check that we have enough labels and descriptions for any parsed scores.\r
+      int onamelen = ScoreNames.length;\r
+      String[] tnames = new String[pattern.numSubs()+1];\r
+      System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);\r
+      String base = tnames[ScoreNames.length-1];\r
+      ScoreNames = tnames;\r
+      String descrbase = ScoreDescriptions[ScoreNames.length-1];\r
+      if (descrbase == null)\r
+        descrbase = "Score parsed from ("+regex+")";\r
+      tnames = new String[pattern.numSubs()];\r
+      System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);\r
+      ScoreDescriptions = tnames;\r
+      for (int i=onamelen; i<ScoreNames.length; i++)\r
+      {\r
+        ScoreNames[i] = base+"_"+i;\r
+        ScoreDescriptions[i] = descrbase+" (column "+i+")";\r
+      }\r
+    }\r
+    for (int i=0; i<seqs.length; i++)\r
+    {\r
+      String descr = seqs[i].getDescription();\r
+      if (pattern.search(descr))\r
+      {\r
+        boolean added=false;\r
+        for (int cols=0; cols<pattern.numSubs(); cols++)\r
+        {\r
+          String sstring = pattern.stringMatched(cols);\r
+          float score;\r
+          try {\r
+            score = new Float(sstring).floatValue();\r
+          }\r
+          catch (Exception e)\r
+          {\r
+            // don't try very hard to parse if regex was wrong.\r
+            continue;\r
+          }\r
+          // add score to sequence annotation.\r
+          AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols], ScoreDescriptions[cols], null);\r
+          an.setScore(score);\r
+          seqs[i].addAlignmentAnnotation(an);\r
+          al.addAnnotation(an);\r
+          added=true;\r
+        }\r
+        if (added)\r
+          count++;\r
+      }\r
+      \r
+    }\r
+    return count; \r
+  }\r
+  public static void main(String argv[]) {\r
+    SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),\r
+            new Sequence("sq2","THISISAPLACEHOLDER"),\r
+            new Sequence("sq3","THISISAPLACEHOLDER"),\r
+            new Sequence("sq4","THISISAPLACEHOLDER")};\r
+    seqs[0].setDescription("1 mydescription1");\r
+    seqs[1].setDescription("mydescription2");\r
+    seqs[2].setDescription("2. 0.1 mydescription3");\r
+    seqs[3].setDescription("3 0.01 mydescription4");\r
+    //seqs[4].setDescription("5 mydescription5");\r
+    Alignment al = new Alignment(seqs);\r
+    ParseProperties pp = new ParseProperties(al);\r
+    String regex = ".*([-0-9.+]+).*";\r
+    System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);\r
+    regex = ".*([-0-9.+]+).+([-0-9.+]+).*";\r
+    System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);\r
+    \r
+  }\r
+}\r