1 package jalview.analysis;
\r
3 import com.stevesoft.pat.Regex;
\r
5 import jalview.datamodel.*;
\r
7 public class ParseProperties
\r
10 * Methods for parsing free text properties on alignments and sequences.
\r
11 * There are a number of ways we might want to do this:
\r
12 * arbitrary regex. and an associated score name for the number that's extracted.
\r
13 * Regex that provides both score and name.
\r
15 * We may also want to :
\r
16 * - modify description to remove parsed numbers (this behaviour is dangerous since exporting the alignment would lose the original form then)
\r
21 * The alignment being operated on
\r
23 private AlignmentI al=null;
\r
26 * initialise a new property parser
\r
29 public ParseProperties(AlignmentI al) {
\r
33 public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex)
\r
35 return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex);
\r
38 public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex)
\r
40 return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex);
\r
43 * Extract scores for sequences by applying regex to description string.
\r
44 * @param seqs seuqences to extract annotation from.
\r
45 * @param ScoreNames labels for each numeric field in regex match
\r
46 * @param ScoreDescriptions description for each numeric field in regex match
\r
47 * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>
\r
48 * @return total number of sequences that matched the regex
\r
50 public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex)
\r
53 Regex pattern = new Regex(regex);
\r
54 if (pattern.numSubs()>ScoreNames.length)
\r
56 // Check that we have enough labels and descriptions for any parsed scores.
\r
57 int onamelen = ScoreNames.length;
\r
58 String[] tnames = new String[pattern.numSubs()+1];
\r
59 System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);
\r
60 String base = tnames[ScoreNames.length-1];
\r
61 ScoreNames = tnames;
\r
62 String descrbase = ScoreDescriptions[onamelen-1];
\r
63 if (descrbase == null)
\r
64 descrbase = "Score parsed from ("+regex+")";
\r
65 tnames = new String[pattern.numSubs()+1];
\r
66 System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);
\r
67 ScoreDescriptions = tnames;
\r
68 for (int i=onamelen; i<ScoreNames.length; i++)
\r
70 ScoreNames[i] = base+"_"+i;
\r
71 ScoreDescriptions[i] = descrbase+" (column "+i+")";
\r
74 for (int i=0; i<seqs.length; i++)
\r
76 String descr = seqs[i].getDescription();
\r
79 if (pattern.search(descr))
\r
81 boolean added=false;
\r
82 for (int cols=0; cols<pattern.numSubs(); cols++)
\r
84 String sstring = pattern.stringMatched(cols+1);
\r
85 double score=Double.NaN;
\r
87 score = new Double(sstring).doubleValue();
\r
91 // don't try very hard to parse if regex was wrong.
\r
94 // add score to sequence annotation.
\r
95 AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols], ScoreDescriptions[cols], null);
\r
97 System.out.println("Score: "+ScoreNames[cols]+"="+score); // DEBUG
\r
98 an.setSequenceRef(seqs[i]);
\r
99 seqs[i].addAlignmentAnnotation(an);
\r
100 al.addAnnotation(an);
\r
110 public static void main(String argv[]) {
\r
111 SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),
\r
112 new Sequence("sq2","THISISAPLACEHOLDER"),
\r
113 new Sequence("sq3","THISISAPLACEHOLDER"),
\r
114 new Sequence("sq4","THISISAPLACEHOLDER")};
\r
115 seqs[0].setDescription("1 mydescription1");
\r
116 seqs[1].setDescription("mydescription2");
\r
117 seqs[2].setDescription("2. 0.1 mydescription3");
\r
118 seqs[3].setDescription("3 0.01 mydescription4");
\r
119 //seqs[4].setDescription("5 mydescription5");
\r
120 Alignment al = new Alignment(seqs);
\r
121 ParseProperties pp = new ParseProperties(al);
\r
122 String regex = ".*([-0-9.+]+)";
\r
123 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);
\r
124 regex = ".*([-0-9.+]+).+([-0-9.+]+).*";
\r
125 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex)+" for "+regex);
\r
126 System.out.println("Finished.");
\r