X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FParseProperties.java;h=3649a29a2f06757c4c05fc78434528c7fd776378;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=c23cb3a39588c493879466cc4a70c21bc8c1c407;hpb=9fe76dc8b685a5a48aeff77c7a9eaf862da3340e;p=jalview.git diff --git a/src/jalview/analysis/ParseProperties.java b/src/jalview/analysis/ParseProperties.java index c23cb3a..3649a29 100644 --- a/src/jalview/analysis/ParseProperties.java +++ b/src/jalview/analysis/ParseProperties.java @@ -1,126 +1,180 @@ -package jalview.analysis; - -import com.stevesoft.pat.Regex; - -import jalview.datamodel.*; - -public class ParseProperties -{ - /** - * Methods for parsing free text properties on alignments and sequences. - * There are a number of ways we might want to do this: - * arbitrary regex. and an associated score name for the number that's extracted. - * Regex that provides both score and name. - * - * We may also want to : - * - modify description to remove parsed numbers (this behaviour is dangerous since exporting the alignment would lose the original form then) - * - - * - */ - /** - * The alignment being operated on - */ - private AlignmentI al=null; - - /** - * initialise a new property parser - * @param al - */ - public ParseProperties(AlignmentI al) { - this.al = al; - } - - public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex) - { - return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex); - } - - public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex) - { - return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex); - } - /** - * Extract scores for sequences by applying regex to description string. - * @param seqs seuqences to extract annotation from. - * @param ScoreNames labels for each numeric field in regex match - * @param ScoreDescriptions description for each numeric field in regex match - * @param regex Regular Expression string for passing to new com.stevesoft.patt.Regex(regex) - * @return total number of sequences that matched the regex - */ - public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex) - { - int count=0; - Regex pattern = new Regex(regex); - if (pattern.numSubs()>ScoreNames.length) - { - // Check that we have enough labels and descriptions for any parsed scores. - int onamelen = ScoreNames.length; - String[] tnames = new String[pattern.numSubs()+1]; - System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length); - String base = tnames[ScoreNames.length-1]; - ScoreNames = tnames; - String descrbase = ScoreDescriptions[onamelen-1]; - if (descrbase == null) - descrbase = "Score parsed from ("+regex+")"; - tnames = new String[pattern.numSubs()+1]; - System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length); - ScoreDescriptions = tnames; - for (int i=onamelen; i. + */ +package jalview.analysis; + +import com.stevesoft.pat.Regex; + +import jalview.datamodel.*; + +public class ParseProperties +{ + /** + * Methods for parsing free text properties on alignments and sequences. There + * are a number of ways we might want to do this: arbitrary regex. and an + * associated score name for the number that's extracted. Regex that provides + * both score and name. + * + * We may also want to : - modify description to remove parsed numbers (this + * behaviour is dangerous since exporting the alignment would lose the + * original form then) - + * + */ + /** + * The alignment being operated on + */ + private AlignmentI al = null; + + /** + * initialise a new property parser + * + * @param al + */ + public ParseProperties(AlignmentI al) + { + this.al = al; + } + + public int getScoresFromDescription(String ScoreName, + String ScoreDescriptions, String regex, boolean repeat) + { + return getScoresFromDescription(new String[] + { ScoreName }, new String[] + { ScoreDescriptions }, regex, repeat); + } + + public int getScoresFromDescription(String[] ScoreNames, + String[] ScoreDescriptions, String regex, boolean repeat) + { + return getScoresFromDescription(al.getSequencesArray(), ScoreNames, + ScoreDescriptions, regex, repeat); + } + + /** + * Extract scores for sequences by applying regex to description string. + * + * @param seqs + * seuqences to extract annotation from. + * @param ScoreNames + * labels for each numeric field in regex match + * @param ScoreDescriptions + * description for each numeric field in regex match + * @param regex + * Regular Expression string for passing to + * new com.stevesoft.patt.Regex(regex) + * @param repeat + * true means the regex will be applied multiple times along the + * description string of each sequence + * @return total number of sequences that matched the regex + */ + public int getScoresFromDescription(SequenceI[] seqs, + String[] ScoreNames, String[] ScoreDescriptions, String regex, + boolean repeat) + { + int count = 0; + Regex pattern = new Regex(regex); + if (pattern.numSubs() > ScoreNames.length) + { + // Check that we have enough labels and descriptions for any parsed + // scores. + int onamelen = ScoreNames.length; + String[] tnames = new String[pattern.numSubs() + 1]; + System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length); + String base = tnames[ScoreNames.length - 1]; + ScoreNames = tnames; + String descrbase = ScoreDescriptions[onamelen - 1]; + if (descrbase == null) + descrbase = "Score parsed from (" + regex + ")"; + tnames = new String[pattern.numSubs() + 1]; + System.arraycopy(ScoreDescriptions, 0, tnames, 0, + ScoreDescriptions.length); + ScoreDescriptions = tnames; + for (int i = onamelen; i < ScoreNames.length; i++) + { + ScoreNames[i] = base + "_" + i; + ScoreDescriptions[i] = descrbase + " (column " + i + ")"; + } + } + for (int i = 0; i < seqs.length; i++) + { + String descr = seqs[i].getDescription(); + if (descr == null) + continue; + int pos = 0; + boolean added = false; + int reps = 0; + while ((repeat || pos == 0) && pattern.searchFrom(descr, pos)) + { + pos = pattern.matchedTo(); + for (int cols = 0; cols < pattern.numSubs(); cols++) + { + String sstring = pattern.stringMatched(cols + 1); + double score = Double.NaN; + try + { + score = new Double(sstring).doubleValue(); + } catch (Exception e) + { + // don't try very hard to parse if regex was wrong. + continue; + } + // add score to sequence annotation. + AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols] + + ((reps > 0) ? "_" + reps : ""), + ScoreDescriptions[cols], null); + an.setScore(score); + System.out.println("Score: " + ScoreNames[cols] + "=" + score); // DEBUG + an.setSequenceRef(seqs[i]); + seqs[i].addAlignmentAnnotation(an); + al.addAnnotation(an); + added = true; + } + reps++; // repeated matches + } + if (added) + { + count++; + } + } + return count; + } + + public static void main(String argv[]) + { + SequenceI[] seqs = new SequenceI[] + { new Sequence("sq1", "THISISAPLACEHOLDER"), + new Sequence("sq2", "THISISAPLACEHOLDER"), + new Sequence("sq3", "THISISAPLACEHOLDER"), + new Sequence("sq4", "THISISAPLACEHOLDER") }; + seqs[0].setDescription("1 mydescription1"); + seqs[1].setDescription("mydescription2"); + seqs[2].setDescription("2. 0.1 mydescription3"); + seqs[3].setDescription("3 0.01 mydescription4"); + // seqs[4].setDescription("5 mydescription5"); + Alignment al = new Alignment(seqs); + ParseProperties pp = new ParseProperties(al); + String regex = ".*([-0-9.+]+)"; + System.out.println("Matched " + + pp.getScoresFromDescription("my Score", + "my Score Description", regex, true) + " for " + regex); + regex = ".*([-0-9.+]+).+([-0-9.+]+).*"; + System.out.println("Matched " + + pp.getScoresFromDescription("my Score", + "my Score Description", regex, true) + " for " + regex); + System.out.println("Finished."); + } +}