2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
3 * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
21 import com.stevesoft.pat.Regex;
23 import jalview.datamodel.*;
25 public class ParseProperties
28 * Methods for parsing free text properties on alignments and sequences.
29 * There are a number of ways we might want to do this:
30 * arbitrary regex. and an associated score name for the number that's extracted.
31 * Regex that provides both score and name.
33 * We may also want to :
34 * - modify description to remove parsed numbers (this behaviour is dangerous since exporting the alignment would lose the original form then)
39 * The alignment being operated on
41 private AlignmentI al=null;
44 * initialise a new property parser
47 public ParseProperties(AlignmentI al) {
51 public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex, boolean repeat)
53 return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex, repeat);
56 public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex, boolean repeat)
58 return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex, repeat);
61 * Extract scores for sequences by applying regex to description string.
62 * @param seqs seuqences to extract annotation from.
63 * @param ScoreNames labels for each numeric field in regex match
64 * @param ScoreDescriptions description for each numeric field in regex match
65 * @param regex Regular Expression string for passing to <code>new com.stevesoft.patt.Regex(regex)</code>
66 * @param repeat true means the regex will be applied multiple times along the description string of each sequence
67 * @return total number of sequences that matched the regex
69 public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex, boolean repeat)
72 Regex pattern = new Regex(regex);
73 if (pattern.numSubs()>ScoreNames.length)
75 // Check that we have enough labels and descriptions for any parsed scores.
76 int onamelen = ScoreNames.length;
77 String[] tnames = new String[pattern.numSubs()+1];
78 System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length);
79 String base = tnames[ScoreNames.length-1];
81 String descrbase = ScoreDescriptions[onamelen-1];
82 if (descrbase == null)
83 descrbase = "Score parsed from ("+regex+")";
84 tnames = new String[pattern.numSubs()+1];
85 System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length);
86 ScoreDescriptions = tnames;
87 for (int i=onamelen; i<ScoreNames.length; i++)
89 ScoreNames[i] = base+"_"+i;
90 ScoreDescriptions[i] = descrbase+" (column "+i+")";
93 for (int i=0; i<seqs.length; i++)
95 String descr = seqs[i].getDescription();
101 while ((repeat || pos==0) && pattern.searchFrom(descr, pos))
103 pos = pattern.matchedTo();
104 for (int cols=0; cols<pattern.numSubs(); cols++)
106 String sstring = pattern.stringMatched(cols+1);
107 double score=Double.NaN;
109 score = new Double(sstring).doubleValue();
113 // don't try very hard to parse if regex was wrong.
116 // add score to sequence annotation.
117 AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols] +((reps>0) ? "_"+reps : ""), ScoreDescriptions[cols], null);
119 System.out.println("Score: "+ScoreNames[cols]+"="+score); // DEBUG
120 an.setSequenceRef(seqs[i]);
121 seqs[i].addAlignmentAnnotation(an);
122 al.addAnnotation(an);
125 reps++; // repeated matches
134 public static void main(String argv[]) {
135 SequenceI[] seqs = new SequenceI[] { new Sequence("sq1","THISISAPLACEHOLDER"),
136 new Sequence("sq2","THISISAPLACEHOLDER"),
137 new Sequence("sq3","THISISAPLACEHOLDER"),
138 new Sequence("sq4","THISISAPLACEHOLDER")};
139 seqs[0].setDescription("1 mydescription1");
140 seqs[1].setDescription("mydescription2");
141 seqs[2].setDescription("2. 0.1 mydescription3");
142 seqs[3].setDescription("3 0.01 mydescription4");
143 //seqs[4].setDescription("5 mydescription5");
144 Alignment al = new Alignment(seqs);
145 ParseProperties pp = new ParseProperties(al);
146 String regex = ".*([-0-9.+]+)";
147 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex, true)+" for "+regex);
148 regex = ".*([-0-9.+]+).+([-0-9.+]+).*";
149 System.out.println("Matched "+pp.getScoresFromDescription("my Score", "my Score Description",regex, true)+" for "+regex);
150 System.out.println("Finished.");