1 package jalview.analysis;
3 import com.stevesoft.pat.Regex;
5 import jalview.datamodel.*;
7 public class ParseProperties
10 * Methods for parsing free text properties on alignments and sequences.
11 * There are a number of ways we might want to do this:
12 * arbitrary regex. and an associated score name for the number that's extracted.
13 * Regex that provides both score and name.
15 * We may also want to :
16 * - modify description to remove parsed numbers (this behaviour is dangerous since exporting the alignment would lose the original form then)
21 * The alignment being operated on
23 private AlignmentI al=null;
26 * initialise a new property parser
29 public ParseProperties(AlignmentI al) {
33 public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex, boolean repeat)
35 return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex, repeat);
38 public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex, boolean repeat)
40 return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex, repeat);
43 * Extract scores for sequences by applying regex to description string.
44 * @param seqs seuqences to extract annotation from.
45 * @param ScoreNames labels for each numeric field in regex match
46 * @param ScoreDescriptions description for each numeric field in regex match
47 * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>
48 * @param repeat true means the regex will be applied multiple times along the description string of each sequence
49 * @return total number of sequences that matched the regex
51 public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex, boolean repeat)
54 Regex pattern = new Regex(regex);
55 if (pattern.numSubs()>ScoreNames.length)
57 // Check that we have enough labels and descriptions for any parsed scores.
58 int onamelen = ScoreNames.length;
59 String[] tnames = new String[pattern.numSubs()+1];
60 System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);
61 String base = tnames[ScoreNames.length-1];
63 String descrbase = ScoreDescriptions[onamelen-1];
64 if (descrbase == null)
65 descrbase = "Score parsed from ("+regex+")";
66 tnames = new String[pattern.numSubs()+1];
67 System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);
68 ScoreDescriptions = tnames;
69 for (int i=onamelen; i<ScoreNames.length; i++)
71 ScoreNames[i] = base+"_"+i;
72 ScoreDescriptions[i] = descrbase+" (column "+i+")";
75 for (int i=0; i<seqs.length; i++)
77 String descr = seqs[i].getDescription();
83 while ((repeat || pos==0) && pattern.searchFrom(descr, pos))
85 pos = pattern.matchedTo();
86 for (int cols=0; cols<pattern.numSubs(); cols++)
88 String sstring = pattern.stringMatched(cols+1);
89 double score=Double.NaN;
91 score = new Double(sstring).doubleValue();
95 // don't try very hard to parse if regex was wrong.
98 // add score to sequence annotation.
99 AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols] +((reps>0) ? "_"+reps : ""), ScoreDescriptions[cols], null);
101 System.out.println("Score: "+ScoreNames[cols]+"="+score); // DEBUG
102 an.setSequenceRef(seqs[i]);
103 seqs[i].addAlignmentAnnotation(an);
104 al.addAnnotation(an);
107 reps++; // repeated matches
116 public static void main(String argv[]) {
117 SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),
118 new Sequence("sq2","THISISAPLACEHOLDER"),
119 new Sequence("sq3","THISISAPLACEHOLDER"),
120 new Sequence("sq4","THISISAPLACEHOLDER")};
121 seqs[0].setDescription("1 mydescription1");
122 seqs[1].setDescription("mydescription2");
123 seqs[2].setDescription("2. 0.1 mydescription3");
124 seqs[3].setDescription("3 0.01 mydescription4");
125 //seqs[4].setDescription("5 mydescription5");
126 Alignment al = new Alignment(seqs);
127 ParseProperties pp = new ParseProperties(al);
128 String regex = ".*([-0-9.+]+)";
129 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex, true)+" for "+regex);
130 regex = ".*([-0-9.+]+).+([-0-9.+]+).*";
131 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex, true)+" for "+regex);
132 System.out.println("Finished.");