1 package jalview.analysis;
\r
3 import com.stevesoft.pat.Regex;
\r
5 import jalview.datamodel.*;
\r
7 public class ParseProperties
\r
10 * Methods for parsing free text properties on alignments and sequences.
\r
13 * The alignment being operated on
\r
15 private AlignmentI al=null;
\r
18 * initialise a new property parser
\r
21 ParseProperties(AlignmentI al) {
\r
25 public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex)
\r
27 return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex);
\r
30 public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex)
\r
32 return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex);
\r
35 * Extract scores for sequences by applying regex to description string.
\r
36 * @param seqs seuqences to extract annotation from.
\r
37 * @param ScoreNames labels for each numeric field in regex match
\r
38 * @param ScoreDescriptions description for each numeric field in regex match
\r
39 * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>
\r
40 * @return total number of sequences that matched the regex
\r
42 public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex)
\r
45 Regex pattern = new Regex(regex);
\r
46 if (pattern.numSubs()>ScoreNames.length)
\r
48 // Check that we have enough labels and descriptions for any parsed scores.
\r
49 int onamelen = ScoreNames.length;
\r
50 String[] tnames = new String[pattern.numSubs()+1];
\r
51 System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);
\r
52 String base = tnames[ScoreNames.length-1];
\r
53 ScoreNames = tnames;
\r
54 String descrbase = ScoreDescriptions[ScoreNames.length-1];
\r
55 if (descrbase == null)
\r
56 descrbase = "Score parsed from ("+regex+")";
\r
57 tnames = new String[pattern.numSubs()];
\r
58 System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);
\r
59 ScoreDescriptions = tnames;
\r
60 for (int i=onamelen; i<ScoreNames.length; i++)
\r
62 ScoreNames[i] = base+"_"+i;
\r
63 ScoreDescriptions[i] = descrbase+" (column "+i+")";
\r
66 for (int i=0; i<seqs.length; i++)
\r
68 String descr = seqs[i].getDescription();
\r
69 if (pattern.search(descr))
\r
71 boolean added=false;
\r
72 for (int cols=0; cols<pattern.numSubs(); cols++)
\r
74 String sstring = pattern.stringMatched(cols);
\r
77 score = new Float(sstring).floatValue();
\r
81 // don't try very hard to parse if regex was wrong.
\r
84 // add score to sequence annotation.
\r
85 AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols], ScoreDescriptions[cols], null);
\r
87 seqs[i].addAlignmentAnnotation(an);
\r
88 al.addAnnotation(an);
\r
98 public static void main(String argv[]) {
\r
99 SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),
\r
100 new Sequence("sq2","THISISAPLACEHOLDER"),
\r
101 new Sequence("sq3","THISISAPLACEHOLDER"),
\r
102 new Sequence("sq4","THISISAPLACEHOLDER")};
\r
103 seqs[0].setDescription("1 mydescription1");
\r
104 seqs[1].setDescription("mydescription2");
\r
105 seqs[2].setDescription("2. 0.1 mydescription3");
\r
106 seqs[3].setDescription("3 0.01 mydescription4");
\r
107 //seqs[4].setDescription("5 mydescription5");
\r
108 Alignment al = new Alignment(seqs);
\r
109 ParseProperties pp = new ParseProperties(al);
\r
110 String regex = ".*([-0-9.+]+).*";
\r
111 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);
\r
112 regex = ".*([-0-9.+]+).+([-0-9.+]+).*";
\r
113 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);
\r