7b74e2a7b3b104e671f4b7ec417676107deed1db
[jalview.git] / src / jalview / ws / jws1 / JPredWSUtils.java
1 package jalview.ws.jws1;
2
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentAnnotation;
5 import jalview.datamodel.AlignmentI;
6 import jalview.datamodel.AlignmentView;
7 import jalview.datamodel.ColumnSelection;
8 import jalview.datamodel.SequenceI;
9 import jalview.io.FileParse;
10 import jalview.io.FormatAdapter;
11 import jalview.util.MessageManager;
12
13 import java.util.Hashtable;
14 import java.util.List;
15
16
17 /**
18  * extraction of processing routines to allow mocking
19  * 
20  * @author jprocter
21  *
22  */
23 public class JPredWSUtils
24 {
25   /**
26    * Process data extracted from service result set to generate a JPred result
27    * view.
28    * 
29    * @param currentView
30    * 
31    * @param input
32    *          - original input alignment
33    * @param gapChar
34    *          - character to use for reconstructing alignment used for
35    *          prediction
36    * @param SequenceInfo
37    *          - from SeqHash
38    * @param msaPred
39    *          - true if a prediction based on existing MSA
40    * @param predMap
41    *          - position
42    * @param result_PredFile
43    * @param result_Aligfile
44    * @param full_alignment
45    * @return { Alignment, ColumnSelection }
46    * @throws Exception
47    */
48   public static Object[] processJnetResult(AlignmentI currentView,
49           AlignmentView input,
50           char gapChar, Hashtable SequenceInfo,
51           boolean msaPred, int[] predMap, String result_PredFile,
52           String result_Aligfile, FileParse full_alignment)
53           throws Exception
54   {
55
56     AlignmentI al = null;
57     ColumnSelection alcsel = null;
58
59     // the position of the query sequence in Alignment al
60     int FirstSeq = -1;
61
62     // the position of the original sequence in the array of
63     // Sequences in the input object that this job holds a
64     // prediction for
65     int msaIndex = 0;
66
67     // JPredFile prediction = new JPredFile("C:/JalviewX/files/jpred.txt",
68     // "File");
69     jalview.io.JPredFile prediction = new jalview.io.JPredFile(
70             result_PredFile, "Paste");
71     SequenceI[] preds = prediction.getSeqsAsArray();
72     jalview.bin.Cache.log.debug("Got prediction profile.");
73
74     if (msaPred && (result_Aligfile != null))
75     {
76       jalview.bin.Cache.log.debug("Getting associated alignment.");
77       // we ignore the returned alignment if we only predicted on a single
78       // sequence
79       String format = new jalview.io.IdentifyFile().identify(
80               result_Aligfile, "Paste");
81
82       if (jalview.io.FormatAdapter.isValidFormat(format))
83       {
84         SequenceI sqs[];
85         if (predMap != null)
86         {
87           Object[] alandcolsel = input
88                   .getAlignmentAndColumnSelection(gapChar);
89           sqs = (SequenceI[]) alandcolsel[0];
90           al = new Alignment(sqs);
91           alcsel = (ColumnSelection) alandcolsel[1];
92         }
93         else
94         {
95           al = new FormatAdapter().readFile(result_Aligfile, "Paste",
96                   format);
97           sqs = new SequenceI[al.getHeight()];
98
99           for (int i = 0, j = al.getHeight(); i < j; i++)
100           {
101             sqs[i] = al.getSequenceAt(i);
102           }
103           if (!jalview.analysis.SeqsetUtils.deuniquify(SequenceInfo, sqs))
104           {
105             throw (new Exception(
106                     MessageManager
107                             .getString("exception.couldnt_recover_sequence_properties_for_alignment")));
108           }
109         }
110         FirstSeq = 0;
111         if (currentView.getDataset() != null)
112         {
113           al.setDataset(currentView.getDataset());
114
115         }
116         else
117         {
118           al.setDataset(null);
119         }
120         jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
121                 FirstSeq, false, predMap);
122
123       }
124       else
125       {
126         throw (new Exception(MessageManager.formatMessage(
127                 "exception.unknown_format_for_file", new String[] { format,
128                     result_Aligfile })));
129       }
130     }
131     else
132     {
133       AlignmentI fullAlignment = null;
134       try
135       {
136         // read full alignment if present.
137         if (full_alignment != null)
138         {
139         fullAlignment = new FormatAdapter().readFromFile(full_alignment,
140                 "FASTA");
141         }
142       } catch (Exception q)
143       {
144
145       } finally
146       {
147         if (fullAlignment != null)
148         {
149           al = fullAlignment;
150           FirstSeq = 0;
151         }
152         else
153         {
154           al = new Alignment(preds);
155           FirstSeq = prediction.getQuerySeqPosition();
156         }
157       }
158
159       if (predMap != null)
160       {
161         // map the prediction onto the query sequence, excluding positions
162         // corresponding to hidden regions in the original input.
163         char gc = gapChar;
164         SequenceI[] sqs = (SequenceI[]) input
165                 .getAlignmentAndColumnSelection(gc)[0];
166         if (msaIndex >= sqs.length)
167         {
168           throw new Error(
169                   MessageManager
170                           .getString("error.implementation_error_invalid_msa_index_for_job"));
171         }
172         if (fullAlignment == null)
173         {
174           // //
175           // Uses RemoveGapsCommand
176           // //
177           new jalview.commands.RemoveGapsCommand(
178                   MessageManager.getString("label.remove_gaps"),
179                   new SequenceI[] { sqs[msaIndex] }, currentView);
180
181           SequenceI profileseq = al.getSequenceAt(FirstSeq);
182           profileseq.setSequence(sqs[msaIndex].getSequenceAsString());
183         }
184       }
185
186       if (!jalview.analysis.SeqsetUtils.SeqCharacterUnhash(
187               al.getSequenceAt(FirstSeq), SequenceInfo))
188       {
189         throw (new Exception(
190                 MessageManager
191                         .getString("exception.couldnt_recover_sequence_props_for_jnet_query")));
192       }
193       else
194       {
195         if (currentView.getDataset() != null)
196         {
197           al.setDataset(currentView.getDataset());
198
199         }
200         else
201         {
202           al.setDataset(null);
203         }
204         jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
205                 FirstSeq, true, predMap);
206         SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps.
207         if (fullAlignment == null)
208         {
209           alignToProfileSeq(al, profileseq);
210         }
211         if (fullAlignment == null && predMap != null)
212         {
213           // Adjust input view for gaps
214           // propagate insertions into profile
215           alcsel = ColumnSelection.propagateInsertions(profileseq, al,
216                   input);
217         }
218       }
219     }
220
221     // transfer to dataset
222     for (AlignmentAnnotation alant : al.getAlignmentAnnotation())
223     {
224       if (alant.sequenceRef != null)
225       {
226         replaceAnnotationOnAlignmentWith(alant, alant.label,
227                 "jalview.jws1.Jpred" + (msaPred ? "MSA" : ""),
228                 alant.sequenceRef);
229       }
230     }
231
232     return new Object[] { al, alcsel }; // , FirstSeq, noMsa};
233   }
234
235   /**
236    * copied from JabawsCalcWorker
237    * 
238    * @param newAnnot
239    * @param typeName
240    * @param calcId
241    * @param aSeq
242    */
243   public static void replaceAnnotationOnAlignmentWith(
244           AlignmentAnnotation newAnnot, String typeName, String calcId,
245           SequenceI aSeq)
246   {
247     SequenceI dsseq = aSeq.getDatasetSequence();
248     while (dsseq.getDatasetSequence() != null)
249     {
250       dsseq = dsseq.getDatasetSequence();
251     }
252     // look for same annotation on dataset and lift this one over
253     List<AlignmentAnnotation> dsan = dsseq.getAlignmentAnnotations(calcId,
254             typeName);
255     if (dsan != null && dsan.size() > 0)
256     {
257       for (AlignmentAnnotation dssan : dsan)
258       {
259         dsseq.removeAlignmentAnnotation(dssan);
260       }
261     }
262     AlignmentAnnotation dssan = new AlignmentAnnotation(newAnnot);
263     dsseq.addAlignmentAnnotation(dssan);
264     dssan.adjustForAlignment();
265   }
266
267   /**
268    * Given an alignment where all other sequences except profileseq are aligned
269    * to the ungapped profileseq, insert gaps in the other sequences to realign
270    * them with the residues in profileseq
271    * 
272    * @param al
273    * @param profileseq
274    */
275   public static void alignToProfileSeq(AlignmentI al, SequenceI profileseq)
276   {
277     char gc = al.getGapCharacter();
278     int[] gapMap = profileseq.gapMap();
279     // insert gaps into profile
280     for (int lp = 0, r = 0; r < gapMap.length; r++)
281     {
282       if (gapMap[r] - lp > 1)
283       {
284         StringBuffer sb = new StringBuffer();
285         for (int s = 0, ns = gapMap[r] - lp; s < ns; s++)
286         {
287           sb.append(gc);
288         }
289         for (int s = 1, ns = al.getHeight(); s < ns; s++)
290         {
291           String sq = al.getSequenceAt(s).getSequenceAsString();
292           int diff = gapMap[r] - sq.length();
293           if (diff > 0)
294           {
295             // pad gaps
296             sq = sq + sb;
297             while ((diff = gapMap[r] - sq.length()) > 0)
298             {
299               sq = sq
300                       + ((diff >= sb.length()) ? sb.toString() : sb
301                               .substring(0, diff));
302             }
303             al.getSequenceAt(s).setSequence(sq);
304           }
305           else
306           {
307             al.getSequenceAt(s).setSequence(
308                     sq.substring(0, gapMap[r]) + sb.toString()
309                             + sq.substring(gapMap[r]));
310           }
311         }
312       }
313       lp = gapMap[r];
314     }
315   }
316
317 }