From: Jim Procter Date: Mon, 2 Nov 2015 15:01:50 +0000 (+0000) Subject: JAL-1950 structuring methods and note on parsing flat-files X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=ecdc8f7020e2fe1b510e03635801528a08480ec8;hp=b919fa3374d70132c50e4c87270461c1be795206;p=jalview.git JAL-1950 structuring methods and note on parsing flat-files --- diff --git a/src/jalview/ws/ebi/hmmerClient.java b/src/jalview/ws/ebi/hmmerClient.java index 3db35be..c29d355 100644 --- a/src/jalview/ws/ebi/hmmerClient.java +++ b/src/jalview/ws/ebi/hmmerClient.java @@ -1,6 +1,14 @@ package jalview.ws.ebi; +import jalview.datamodel.AlignmentI; +import jalview.io.AppletFormatAdapter; +import jalview.io.FileParse; +import jalview.io.FormatAdapter; + +import java.io.BufferedReader; import java.io.File; +import java.io.IOException; +import java.util.StringTokenizer; import java.util.regex.Matcher; import org.apache.axis.transport.http.HTTPConstants; @@ -219,4 +227,76 @@ public class hmmerClient * results...... */ } + + /** + * retrieve an alignment annotated with scores from JackHmmer + * + * @param jobid + * @param dataset + * @return + */ + AlignmentI retrieveJackhmmerResult(String jobid, AlignmentI dataset) + throws OutOfMemoryError, IOException + { + AlignmentI searchResult = null; + + // get results + + searchResult = new AppletFormatAdapter().readFile(baseUrl + + "/download/" + jobid + + "/score?format=afa&t=.gz", FormatAdapter.URL, "FASTA"); + + // match up to dataset. + + // and do scores + + FileParse csvsource = new FileParse(baseUrl + "/download/" + jobid + + "/score?format=csv", FormatAdapter.URL); + if (!csvsource.isValid()) + { + throw new IOException("Couldn't access scores for Jackhammer results"); + } + readJackhmmerScores(searchResult, csvsource); + return searchResult; + } + + private void readJackhmmerScores(AlignmentI searchResult, + FileParse csvsource) + throws IOException + { + String line; + BufferedReader rl = new BufferedReader(csvsource.getReader()); + while ((line = rl.readLine()) != null) + { + StringTokenizer st = new StringTokenizer(line, "\t"); + + } + // http://www.ebi.ac.uk/Tools/hmmer/download/60048B38-7CEC-11E5-A230-CED6D26C98AD.5/score?format=csv + // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 1 2 4.4e-46 2.1e-43 + // each line scores a fragment + // so for a combined score ? + + /** + * for a sequence q sort any t against q according to overallScore(q,t) + * maxFragment(q,t) in sequence features parlance: for alignment + * s.getFeature("overallScore",q) -> range on q and range on s + * + * + */ + + // 151.758316040039 0.04 11 151 3 139 1 150 0.94 GROWTH FACTOR BOUND PROTEIN + // 2 1cj1_J 1gri_B + // 1gri_A 1gri_A 217 jackhmmer - 163 4.7e-62 212.4 0.1 2 2 1.6e-17 7.9e-15 + // 58.8796501159668 0.01 7 66 157 215 153 216 0.95 GROWTH FACTOR BOUND + // PROTEIN 2 1cj1_J 1gri_B + // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 1 2 7.5e-28 3.6e-25 + // 92.4921493530273 0.00 65 161 20 122 17 124 0.95 Tyrosine-protein + // phosphatase non-receptor typ 4h1o_A + // + // 4h1o_A 4h1o_A 560 jackhmmer - 163 2.1e-57 197.3 0.0 2 2 7.6e-31 3.7e-28 + // 102.219146728516 0.03 66 161 127 236 124 238 0.94 Tyrosine-protein + // phosphatase non-receptor typ 4h1o_A + + } + }