2 * Copyright (c) 2011 Peter Troshin JAva Bioinformatics Analysis Web Services
\r
3 * (JABAWS) @version: 2.0 This library is free software; you can redistribute it
\r
4 * and/or modify it under the terms of the Apache License version 2 as published
\r
5 * by the Apache Software Foundation This library is distributed in the hope
\r
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
\r
7 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
8 * Apache License for more details. A copy of the license is in
\r
9 * apache_license.txt. It is also available here:
\r
10 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or
\r
11 * derived work distributed in source code form must include this copyright and
\r
15 package compbio.runner.disorder;
\r
17 import java.io.File;
\r
18 import java.io.FileInputStream;
\r
19 import java.io.FileNotFoundException;
\r
20 import java.io.IOException;
\r
21 import java.io.InputStream;
\r
22 import java.util.Map;
\r
23 import java.util.Set;
\r
25 import org.apache.log4j.Logger;
\r
27 import compbio.data.sequence.Score;
\r
28 import compbio.data.sequence.SequenceUtil;
\r
29 import compbio.data.sequence.UnknownFileFormatException;
\r
30 import compbio.engine.client.Executable;
\r
31 import compbio.engine.client.PipedExecutable;
\r
32 import compbio.engine.client.SkeletalExecutable;
\r
33 import compbio.metadata.Limit;
\r
34 import compbio.metadata.LimitsManager;
\r
35 import compbio.metadata.ResultNotAvailableException;
\r
36 import compbio.runner.Util;
\r
39 * DisEMBL.py smooth_frame peak_frame join_frame fold_coils fold_hotloops
\r
40 * fold_rem465 sequence_file print
\r
42 * 'A default run would be: ./DisEMBL.py 8 8 4 1.2 1.4 1.2 fasta_file > out'
\r
44 * This version of DisEMBL is 1.4 (latest available for download in Feb 2011)
\r
45 * capable of outputting raw values
\r
47 * The values of the parameters are hard coded in DisEMBL.py script.
\r
48 * smooth_frame=8 peak_frame=8 join_frame=4 fold_coils=1.2 fold_hotloops=1.4
\r
51 * Changing these values are not recommended by developers, apart from smoothing
\r
52 * window. However, 5 orders of magnitude changes in this parameter does not
\r
53 * change the output so allowing this change also seems pointless. Finally, the
\r
54 * binary, DisEMBL depends on - Tisean is not happy with arbitruary changes to
\r
55 * these values, so changing them can lead to problems.
\r
58 * This is not a standard DisEMBL! The script has been modified!
\r
61 public class Disembl extends SkeletalExecutable<Disembl>
\r
63 PipedExecutable<Disembl> {
\r
65 private static Logger log = Logger.getLogger(Disembl.class);
\r
67 // Cache for Limits information
\r
68 private static LimitsManager<Disembl> limits;
\r
70 public static final String KEY_VALUE_SEPARATOR = Util.SPACE;
\r
73 * For the region to be considered disordered the values must exceed these
\r
75 public final double COILS_EXPECTATION_THRESHOLD = 0.43;
\r
76 public final double REM_EXPECTATION_THRESHOLD = 0.5;
\r
77 public final double LOOPS_EXPECTATION_THRESHOLD = 0.086;
\r
79 /* The parameter list there must not contain same values! */
\r
81 // remove default input to prevent it to appear in the parameters list
\r
82 // that could happen if the parameters are set first
\r
83 // super.setInput("");
\r
86 @SuppressWarnings("unchecked")
\r
87 public Map<String, Set<Score>> getResults(String workDirectory)
\r
88 throws ResultNotAvailableException {
\r
90 InputStream inStream = null;
\r
91 Map<String, Set<Score>> results = null;
\r
92 // How about getting ranges?
\r
94 inStream = new FileInputStream(new File(workDirectory, getOutput()));
\r
95 results = SequenceUtil.removeSequences(SequenceUtil
\r
96 .readDisembl(inStream));
\r
98 } catch (FileNotFoundException e) {
\r
99 log.error(e.getMessage(), e.getCause());
\r
100 throw new ResultNotAvailableException(e);
\r
101 } catch (IOException e) {
\r
102 log.error(e.getMessage(), e.getCause());
\r
103 throw new ResultNotAvailableException(e);
\r
104 } catch (UnknownFileFormatException e) {
\r
105 log.error(e.getMessage(), e.getCause());
\r
106 throw new ResultNotAvailableException(e);
\r
107 } catch (NullPointerException e) {
\r
108 log.error(e.getMessage(), e.getCause());
\r
109 throw new ResultNotAvailableException(e);
\r
116 public Disembl setInput(String inFile) {
\r
117 super.setInput(inFile);
\r
118 cbuilder.setLast(inFile);
\r
123 public Limit<Disembl> getLimit(String presetName) {
\r
124 if (limits == null) {
\r
125 limits = getLimits();
\r
128 Limit<Disembl> limit = null;
\r
129 if (limits != null) {
\r
130 // this returns default limit if preset is undefined!
\r
131 limit = limits.getLimitByName(presetName);
\r
133 // If limit is not defined for a particular preset, then return default
\r
135 if (limit == null) {
\r
136 log.debug("Limit for the preset " + presetName
\r
137 + " is not found. Using default");
\r
138 limit = limits.getDefaultLimit();
\r
144 public LimitsManager<Disembl> getLimits() {
\r
145 // synchronise on static field
\r
146 synchronized (log) {
\r
147 if (limits == null) {
\r
148 limits = Util.getLimits(this.getClass());
\r
155 public Class<? extends Executable<?>> getType() {
\r
156 return this.getClass();
\r