X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FParseProperties.java;h=d7e2eb50cbe93e355ea5764e069c5f22f592cbb8;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=015fcfe760095d9bccecc3ef65628b5b09d64ea4;hpb=1c6e3aa5ffd0d3fbbfe5bffbdeaa9c4f54a61cd3;p=jalview.git diff --git a/src/jalview/analysis/ParseProperties.java b/src/jalview/analysis/ParseProperties.java index 015fcfe..d7e2eb5 100644 --- a/src/jalview/analysis/ParseProperties.java +++ b/src/jalview/analysis/ParseProperties.java @@ -1,116 +1,164 @@ -package jalview.analysis; - -import com.stevesoft.pat.Regex; - -import jalview.datamodel.*; - -public class ParseProperties -{ - /** - * Methods for parsing free text properties on alignments and sequences. - */ - /** - * The alignment being operated on - */ - private AlignmentI al=null; - - /** - * initialise a new property parser - * @param al - */ - ParseProperties(AlignmentI al) { - this.al = al; - } - - public int getScoresFromDescription(String ScoreName, String ScoreDescriptions, String regex) - { - return getScoresFromDescription(new String[] { ScoreName }, new String[] { ScoreDescriptions}, regex); - } - - public int getScoresFromDescription(String[] ScoreNames, String[] ScoreDescriptions, String regex) - { - return getScoresFromDescription(al.getSequencesArray(), ScoreNames, ScoreDescriptions, regex); - } - /** - * Extract scores for sequences by applying regex to description string. - * @param seqs seuqences to extract annotation from. - * @param ScoreNames labels for each numeric field in regex match - * @param ScoreDescriptions description for each numeric field in regex match - * @param regex Regular Expression string for passing to new com.stevesoft.patt.Regex(regex) - * @return total number of sequences that matched the regex - */ - public int getScoresFromDescription(SequenceI[] seqs, String[] ScoreNames, String[] ScoreDescriptions, String regex) - { - int count=0; - Regex pattern = new Regex(regex); - if (pattern.numSubs()>ScoreNames.length) - { - // Check that we have enough labels and descriptions for any parsed scores. - int onamelen = ScoreNames.length; - String[] tnames = new String[pattern.numSubs()+1]; - System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length); - String base = tnames[ScoreNames.length-1]; - ScoreNames = tnames; - String descrbase = ScoreDescriptions[ScoreNames.length-1]; - if (descrbase == null) - descrbase = "Score parsed from ("+regex+")"; - tnames = new String[pattern.numSubs()]; - System.arraycopy(ScoreDescriptions, 0, tnames, 0, ScoreDescriptions.length); - ScoreDescriptions = tnames; - for (int i=onamelen; i. + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; + +import com.stevesoft.pat.Regex; + +public class ParseProperties +{ + /** + * Methods for parsing free text properties on alignments and sequences. There + * are a number of ways we might want to do this: arbitrary regex. and an + * associated score name for the number that's extracted. Regex that provides + * both score and name. + * + * We may also want to : - modify description to remove parsed numbers (this + * behaviour is dangerous since exporting the alignment would lose the + * original form then) - + * + */ + /** + * The alignment being operated on + */ + private AlignmentI al = null; + + /** + * initialise a new property parser + * + * @param al + */ + public ParseProperties(AlignmentI al) + { + this.al = al; + } + + public int getScoresFromDescription(String ScoreName, + String ScoreDescriptions, String regex, boolean repeat) + { + return getScoresFromDescription(new String[] { ScoreName }, + new String[] { ScoreDescriptions }, regex, repeat); + } + + public int getScoresFromDescription(String[] ScoreNames, + String[] ScoreDescriptions, String regex, boolean repeat) + { + return getScoresFromDescription(al.getSequencesArray(), ScoreNames, + ScoreDescriptions, regex, repeat); + } + + /** + * Extract scores for sequences by applying regex to description string. + * + * @param seqs + * seuqences to extract annotation from. + * @param ScoreNames + * labels for each numeric field in regex match + * @param ScoreDescriptions + * description for each numeric field in regex match + * @param regex + * Regular Expression string for passing to + * new com.stevesoft.patt.Regex(regex) + * @param repeat + * true means the regex will be applied multiple times along the + * description string of each sequence + * @return total number of sequences that matched the regex + */ + public int getScoresFromDescription(SequenceI[] seqs, + String[] ScoreNames, String[] ScoreDescriptions, String regex, + boolean repeat) + { + int count = 0; + Regex pattern = new Regex(regex); + if (pattern.numSubs() > ScoreNames.length) + { + // Check that we have enough labels and descriptions for any parsed + // scores. + int onamelen = ScoreNames.length; + String[] tnames = new String[pattern.numSubs() + 1]; + System.arraycopy(ScoreNames, 0, tnames, 0, ScoreNames.length); + String base = tnames[ScoreNames.length - 1]; + ScoreNames = tnames; + String descrbase = ScoreDescriptions[onamelen - 1]; + if (descrbase == null) + { + descrbase = "Score parsed from (" + regex + ")"; + } + tnames = new String[pattern.numSubs() + 1]; + System.arraycopy(ScoreDescriptions, 0, tnames, 0, + ScoreDescriptions.length); + ScoreDescriptions = tnames; + for (int i = onamelen; i < ScoreNames.length; i++) + { + ScoreNames[i] = base + "_" + i; + ScoreDescriptions[i] = descrbase + " (column " + i + ")"; + } + } + for (int i = 0; i < seqs.length; i++) + { + String descr = seqs[i].getDescription(); + if (descr == null) + { + continue; + } + int pos = 0; + boolean added = false; + int reps = 0; + while ((repeat || pos == 0) && pattern.searchFrom(descr, pos)) + { + pos = pattern.matchedTo(); + for (int cols = 0; cols < pattern.numSubs(); cols++) + { + String sstring = pattern.stringMatched(cols + 1); + double score = Double.NaN; + try + { + score = new Double(sstring).doubleValue(); + } catch (Exception e) + { + // don't try very hard to parse if regex was wrong. + continue; + } + // add score to sequence annotation. + AlignmentAnnotation an = new AlignmentAnnotation(ScoreNames[cols] + + ((reps > 0) ? "_" + reps : ""), + ScoreDescriptions[cols], null); + an.setScore(score); + System.out.println(seqs[i].getName() + " score: '" + + ScoreNames[cols] + "' = " + score); // DEBUG + an.setSequenceRef(seqs[i]); + seqs[i].addAlignmentAnnotation(an); + al.addAnnotation(an); + added = true; + } + reps++; // repeated matches + } + if (added) + { + count++; + } + } + return count; + } +}