1 package jalview.ws.jws1;
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentAnnotation;
5 import jalview.datamodel.AlignmentI;
6 import jalview.datamodel.AlignmentView;
7 import jalview.datamodel.ColumnSelection;
8 import jalview.datamodel.SequenceI;
9 import jalview.io.FileParse;
10 import jalview.io.FormatAdapter;
11 import jalview.util.MessageManager;
13 import java.io.IOException;
14 import java.util.Hashtable;
15 import java.util.List;
18 * extraction of processing routines to allow mocking
23 public class JPredWSUtils
26 * Process data extracted from service result set to generate a JPred result
32 * - original input alignment
34 * - character to use for reconstructing alignment used for
39 * - true if a prediction based on existing MSA
42 * @param result_PredFile
43 * @param result_Aligfile
44 * @param full_alignment
45 * @return { Alignment, ColumnSelection }
48 public static Object[] processJnetResult(AlignmentI currentView,
49 AlignmentView input, char gapChar, Hashtable SequenceInfo,
50 boolean msaPred, int[] predMap, String result_PredFile,
51 String result_Aligfile, FileParse full_alignment)
56 ColumnSelection alcsel = null;
58 // the position of the query sequence in Alignment al
61 // the position of the original sequence in the array of
62 // Sequences in the input object that this job holds a
66 // JPredFile prediction = new JPredFile("C:/JalviewX/files/jpred.txt",
68 jalview.io.JPredFile prediction = new jalview.io.JPredFile(
69 result_PredFile, "Paste");
70 SequenceI[] preds = prediction.getSeqsAsArray();
71 jalview.bin.Cache.log.debug("Got prediction profile.");
73 if (msaPred && (result_Aligfile != null))
75 jalview.bin.Cache.log.debug("Getting associated alignment.");
76 // we ignore the returned alignment if we only predicted on a single
78 String format = new jalview.io.IdentifyFile().identify(
79 result_Aligfile, "Paste");
81 if (jalview.io.FormatAdapter.isValidFormat(format))
86 Object[] alandcolsel = input
87 .getAlignmentAndColumnSelection(gapChar);
88 sqs = (SequenceI[]) alandcolsel[0];
89 al = new Alignment(sqs);
90 alcsel = (ColumnSelection) alandcolsel[1];
94 al = new FormatAdapter().readFile(result_Aligfile, "Paste",
96 sqs = new SequenceI[al.getHeight()];
98 for (int i = 0, j = al.getHeight(); i < j; i++)
100 sqs[i] = al.getSequenceAt(i);
102 if (!jalview.analysis.SeqsetUtils.deuniquify(SequenceInfo, sqs))
104 throw (new Exception(
106 .getString("exception.couldnt_recover_sequence_properties_for_alignment")));
110 if (currentView.getDataset() != null)
112 al.setDataset(currentView.getDataset());
119 jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
120 FirstSeq, false, predMap);
125 throw (new Exception(MessageManager.formatMessage(
126 "exception.unknown_format_for_file", new String[] { format,
127 result_Aligfile })));
132 AlignmentI fullAlignment = null;
135 // read full alignment if present.
136 if (!msaPred && full_alignment != null)
138 fullAlignment = new FormatAdapter().readFromFile(full_alignment,
141 } catch (IOException q)
146 if (fullAlignment != null)
153 al = new Alignment(preds);
154 FirstSeq = prediction.getQuerySeqPosition();
160 // map the prediction onto the query sequence, excluding positions
161 // corresponding to hidden regions in the original input.
163 SequenceI[] sqs = (SequenceI[]) input
164 .getAlignmentAndColumnSelection(gc)[0];
165 if (msaIndex >= sqs.length)
169 .getString("error.implementation_error_invalid_msa_index_for_job"));
171 if (fullAlignment == null)
174 // Uses RemoveGapsCommand
176 new jalview.commands.RemoveGapsCommand(
177 MessageManager.getString("label.remove_gaps"),
178 new SequenceI[] { sqs[msaIndex] }, currentView);
180 SequenceI profileseq = al.getSequenceAt(FirstSeq);
181 profileseq.setSequence(sqs[msaIndex].getSequenceAsString());
185 if (!jalview.analysis.SeqsetUtils.SeqCharacterUnhash(
186 al.getSequenceAt(FirstSeq), SequenceInfo))
188 throw (new Exception(
190 .getString("exception.couldnt_recover_sequence_props_for_jnet_query")));
194 if (currentView.getDataset() != null)
196 al.setDataset(currentView.getDataset());
203 if (fullAlignment != null)
205 // map gapMap from positions in visible sequence to positions in
212 jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
213 FirstSeq, true, predMap);
214 SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps.
215 if (fullAlignment == null)
217 alignToProfileSeq(al, profileseq);
219 if (fullAlignment == null && predMap != null)
221 // Adjust input view for gaps
222 // propagate insertions into profile
223 alcsel = ColumnSelection.propagateInsertions(profileseq, al,
229 // transfer to dataset
230 for (AlignmentAnnotation alant : al.getAlignmentAnnotation())
232 if (alant.sequenceRef != null)
234 replaceAnnotationOnAlignmentWith(alant, alant.label,
235 "jalview.jws1.Jpred" + (msaPred ? "MSA" : ""),
240 return new Object[] { al, alcsel }; // , FirstSeq, noMsa};
244 * copied from JabawsCalcWorker
251 public static void replaceAnnotationOnAlignmentWith(
252 AlignmentAnnotation newAnnot, String typeName, String calcId,
255 SequenceI dsseq = aSeq.getDatasetSequence();
256 while (dsseq.getDatasetSequence() != null)
258 dsseq = dsseq.getDatasetSequence();
260 // look for same annotation on dataset and lift this one over
261 List<AlignmentAnnotation> dsan = dsseq.getAlignmentAnnotations(calcId,
263 if (dsan != null && dsan.size() > 0)
265 for (AlignmentAnnotation dssan : dsan)
267 dsseq.removeAlignmentAnnotation(dssan);
270 AlignmentAnnotation dssan = new AlignmentAnnotation(newAnnot);
271 dsseq.addAlignmentAnnotation(dssan);
272 dssan.adjustForAlignment();
276 * Given an alignment where all other sequences except profileseq are aligned
277 * to the ungapped profileseq, insert gaps in the other sequences to realign
278 * them with the residues in profileseq
283 public static void alignToProfileSeq(AlignmentI al, SequenceI profileseq)
285 char gc = al.getGapCharacter();
286 int[] gapMap = profileseq.gapMap();
287 insertGapsInto(al, gc, gapMap);
291 * Given an original sequence, and an alignment involving just the visible
292 * region insert gaps into the alignment and add in the missing residues from
293 * the original sequence
299 public static void insertHiddenResidues(AlignmentI al, char gc,
300 int[] predMap, SequenceI origseq)
302 // orig: asdfPPPPPPPasdfPPPPasdf
304 // al: -----P-P-P---P---P----P---P-P--PP---P---
305 // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS
306 // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS
310 // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf
311 // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS....
312 // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS....
314 // iteration 0: add asdf, append -----P
315 // iteration 1: append -P
316 // iteration 2: append -P
317 // iteration 3: append ---P
318 // iteration 4: append ---P
319 // iteration 5: append ----P
320 // iteration 6: append ---P
321 // iteration 7: append -P
322 // iteration 8: append P
323 // iteration 9: append P
324 // iteration 10: append ---P
325 // tail: append: ---, add asdf
328 SequenceI predseq = al.getSequenceAt(0);
329 int predIdx = 0; // next column of prediction to preserve
330 // positions in original and prediction sequence
331 int lp = origseq.getStart() - 1, predPos = predseq.getStart();
332 for (int r = 0; r < predMap.length; r++)
334 // also need to keep track of trimmed prediction sequence numbering
335 if (predMap[r] - lp > 1)
337 // hidden region insert from origseq
338 String insert = origseq.getSequenceAsString(
339 origseq.findIndex(lp + 1) - 1,
340 origseq.findIndex(predMap[r]) - 1);
342 insertGapsAt(al, gc, alseq.length(), insert.length());
345 // Now update prediction sequence for next position.
347 int predIdxNext = predseq.findIndex(predPos); // everything up
353 if (predIdxNext <= predIdx)
355 predIdxNext = predseq.getLength();
357 // just add in next segment of predseq
358 String predsert = predseq.getSequenceAsString(predIdx, predIdxNext);
360 predIdx = predIdxNext;
366 // add any remaining gaps
368 int predIdxNext = predseq.findIndex(predPos); // everything up
374 if (predIdxNext <= predIdx)
376 predIdxNext = predseq.getLength();
378 // just add in next segment of predseq
379 String predsert = predseq.getSequenceAsString(predIdx, predIdxNext);
381 predIdx = predIdxNext;
384 if (lp < origseq.getEnd())
386 String insert = origseq.getSequenceAsString(
387 origseq.findIndex(lp + 1) - 1, origseq.getLength());
388 insertGapsAt(al, gc, alseq.length(), insert.length());
391 // then add in origseq data.
392 predseq.setSequence(alseq);
395 public static void insertGapsInto(AlignmentI al, char gc, int[] gapMap)
397 // insert gaps into profile
398 for (int lp = 0, r = 0; r < gapMap.length; r++)
400 if (gapMap[r] - lp > 1)
402 insertGapsAt(al, gc, gapMap[r], gapMap[r] - lp);
408 private static void insertGapsAt(AlignmentI al, char gc, int i, int lp)
411 StringBuffer sb = new StringBuffer();
412 for (int s = 0, ns = lp; s < ns; s++)
416 for (int s = 1, ns = al.getHeight(); s < ns; s++)
418 String sq = al.getSequenceAt(s).getSequenceAsString();
419 int diff = i - sq.length();
424 while ((diff = i - sq.length()) > 0)
427 + ((diff >= sb.length()) ? sb.toString() : sb.substring(
430 al.getSequenceAt(s).setSequence(sq);
434 al.getSequenceAt(s).setSequence(
435 sq.substring(0, i) + sb.toString() + sq.substring(i));