1 package jalview.ws.jws1;
3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentAnnotation;
5 import jalview.datamodel.AlignmentI;
6 import jalview.datamodel.AlignmentView;
7 import jalview.datamodel.ColumnSelection;
8 import jalview.datamodel.SequenceI;
9 import jalview.io.FileParse;
10 import jalview.io.FormatAdapter;
11 import jalview.util.MessageManager;
13 import java.io.IOException;
14 import java.util.Hashtable;
15 import java.util.List;
18 * extraction of processing routines to allow mocking
23 public class JPredWSUtils
26 * Process data extracted from service result set to generate a JPred result
32 * - original input alignment
34 * - character to use for reconstructing alignment used for
39 * - true if a prediction based on existing MSA
42 * @param result_PredFile
43 * @param result_Aligfile
44 * @param full_alignment
45 * @return { Alignment, ColumnSelection }
48 public static Object[] processJnetResult(AlignmentI currentView,
49 AlignmentView input, char gapChar, Hashtable SequenceInfo,
50 boolean msaPred, int[] predMap, String result_PredFile,
51 String result_Aligfile, FileParse full_alignment)
56 ColumnSelection alcsel = null;
58 // the position of the query sequence in Alignment al
61 // the position of the original sequence in the array of
62 // Sequences in the input object that this job holds a
66 // JPredFile prediction = new JPredFile("C:/JalviewX/files/jpred.txt",
68 jalview.io.JPredFile prediction = new jalview.io.JPredFile(
69 result_PredFile, "Paste");
70 SequenceI[] preds = prediction.getSeqsAsArray();
71 jalview.bin.Cache.log.debug("Got prediction profile.");
73 if (msaPred && (result_Aligfile != null))
75 jalview.bin.Cache.log.debug("Getting associated alignment.");
76 // we ignore the returned alignment if we only predicted on a single
78 String format = new jalview.io.IdentifyFile().identify(
79 result_Aligfile, "Paste");
81 if (jalview.io.FormatAdapter.isValidFormat(format))
86 Object[] alandcolsel = input
87 .getAlignmentAndColumnSelection(gapChar);
88 sqs = (SequenceI[]) alandcolsel[0];
89 al = new Alignment(sqs);
90 alcsel = (ColumnSelection) alandcolsel[1];
94 al = new FormatAdapter().readFile(result_Aligfile, "Paste",
96 sqs = new SequenceI[al.getHeight()];
98 for (int i = 0, j = al.getHeight(); i < j; i++)
100 sqs[i] = al.getSequenceAt(i);
102 if (!jalview.analysis.SeqsetUtils.deuniquify(SequenceInfo, sqs))
104 throw (new Exception(
106 .getString("exception.couldnt_recover_sequence_properties_for_alignment")));
110 if (currentView.getDataset() != null)
112 al.setDataset(currentView.getDataset());
119 jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
120 FirstSeq, false, predMap);
125 throw (new Exception(MessageManager.formatMessage(
126 "exception.unknown_format_for_file", new String[] { format,
127 result_Aligfile })));
132 AlignmentI fullAlignment = null;
135 // read full alignment if present.
136 if (!msaPred && full_alignment != null)
138 fullAlignment = new FormatAdapter().readFromFile(full_alignment,
141 } catch (IOException q)
146 if (fullAlignment != null)
153 al = new Alignment(preds);
154 FirstSeq = prediction.getQuerySeqPosition();
160 // map the prediction onto the query sequence, excluding positions
161 // corresponding to hidden regions in the original input.
163 SequenceI[] sqs = (SequenceI[]) input
164 .getAlignmentAndColumnSelection(gc)[0];
165 if (msaIndex >= sqs.length)
169 .getString("error.implementation_error_invalid_msa_index_for_job"));
172 // Uses RemoveGapsCommand
174 new jalview.commands.RemoveGapsCommand(
175 MessageManager.getString("label.remove_gaps"),
176 new SequenceI[] { sqs[msaIndex] }, currentView);
177 if (fullAlignment == null)
179 // just replace trimmed sequence in prediction profile with full
181 SequenceI profileseq = al.getSequenceAt(FirstSeq);
182 profileseq.setSequence(sqs[msaIndex].getSequenceAsString());
186 insertHiddenResidues(al, '.', predMap, sqs[msaIndex]);
190 if (!jalview.analysis.SeqsetUtils.SeqCharacterUnhash(
191 al.getSequenceAt(FirstSeq), SequenceInfo))
193 throw (new Exception(
195 .getString("exception.couldnt_recover_sequence_props_for_jnet_query")));
199 if (currentView.getDataset() != null)
201 al.setDataset(currentView.getDataset());
208 if (fullAlignment != null)
210 // map gapMap from positions in visible sequence to positions in
217 jalview.io.JnetAnnotationMaker.add_annotation(prediction, al,
218 FirstSeq, true, predMap);
219 SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps.
220 if (fullAlignment == null)
222 alignToProfileSeq(al, profileseq);
224 if (fullAlignment == null && predMap != null)
226 // Adjust input view for gaps
227 // propagate insertions into profile
228 alcsel = ColumnSelection.propagateInsertions(profileseq, al,
234 // transfer to dataset
235 for (AlignmentAnnotation alant : al.getAlignmentAnnotation())
237 if (alant.sequenceRef != null)
239 replaceAnnotationOnAlignmentWith(alant, alant.label,
240 "jalview.jws1.Jpred" + (msaPred ? "MSA" : ""),
245 return new Object[] { al, alcsel }; // , FirstSeq, noMsa};
249 * copied from JabawsCalcWorker
256 public static void replaceAnnotationOnAlignmentWith(
257 AlignmentAnnotation newAnnot, String typeName, String calcId,
260 SequenceI dsseq = aSeq.getDatasetSequence();
261 while (dsseq.getDatasetSequence() != null)
263 dsseq = dsseq.getDatasetSequence();
265 // look for same annotation on dataset and lift this one over
266 List<AlignmentAnnotation> dsan = dsseq.getAlignmentAnnotations(calcId,
268 if (dsan != null && dsan.size() > 0)
270 for (AlignmentAnnotation dssan : dsan)
272 dsseq.removeAlignmentAnnotation(dssan);
275 AlignmentAnnotation dssan = new AlignmentAnnotation(newAnnot);
276 dsseq.addAlignmentAnnotation(dssan);
277 dssan.adjustForAlignment();
281 * Given an alignment where all other sequences except profileseq are aligned
282 * to the ungapped profileseq, insert gaps in the other sequences to realign
283 * them with the residues in profileseq
288 public static void alignToProfileSeq(AlignmentI al, SequenceI profileseq)
290 char gc = al.getGapCharacter();
291 int[] gapMap = profileseq.gapMap();
292 insertGapsInto(al, gc, gapMap);
296 * Given an original sequence, and an alignment involving just the visible
297 * region insert gaps into the alignment and add in the missing residues from
298 * the original sequence
304 public static void insertHiddenResidues(AlignmentI al, char gc,
305 int[] predMap, SequenceI origseq)
307 // orig: asdfPPPPPPPasdfPPPPasdf
309 // al: -----P-P-P---P---P----P---P-P--PP---P---
310 // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS
311 // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS
315 // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf
316 // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS....
317 // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS....
319 // iteration 0: add asdf, append -----P
320 // iteration 1: append -P
321 // iteration 2: append -P
322 // iteration 3: append ---P
323 // iteration 4: append ---P
324 // iteration 5: append ----P
325 // iteration 6: append ---P
326 // iteration 7: append -P
327 // iteration 8: append P
328 // iteration 9: append P
329 // iteration 10: append ---P
330 // tail: append: ---, add asdf
333 SequenceI predseq = al.getSequenceAt(0);
334 int predIdx = 0; // next column of prediction to preserve
335 // positions in original and prediction sequence
336 int lp = origseq.getStart() - 1, predPos = predseq.getStart();
337 for (int r = 0; r < predMap.length; r++)
339 // also need to keep track of trimmed prediction sequence numbering
340 if (predMap[r] - lp > 1)
342 // hidden region insert from origseq
343 String insert = origseq.getSequenceAsString(
344 origseq.findIndex(lp + 1) - 1,
345 origseq.findIndex(predMap[r]) - 1);
347 insertGapsAt(al, gc, alseq.length(), insert.length());
350 // Now update prediction sequence for next position.
352 int predIdxNext = predseq.findIndex(predPos); // everything up
358 if (predIdxNext <= predIdx)
360 predIdxNext = predseq.getLength();
362 // just add in next segment of predseq
363 String predsert = predseq.getSequenceAsString(predIdx, predIdxNext);
365 predIdx = predIdxNext;
371 // add any remaining gaps
373 int predIdxNext = predseq.findIndex(predPos); // everything up
379 if (predIdxNext <= predIdx)
381 predIdxNext = predseq.getLength();
383 // just add in next segment of predseq
384 String predsert = predseq.getSequenceAsString(predIdx, predIdxNext);
386 predIdx = predIdxNext;
389 if (lp < origseq.getEnd())
391 String insert = origseq.getSequenceAsString(
392 origseq.findIndex(lp + 1) - 1, origseq.getLength());
393 insertGapsAt(al, gc, alseq.length(), insert.length());
396 // then add in origseq data.
397 predseq.setSequence(alseq);
400 public static void insertGapsInto(AlignmentI al, char gc, int[] gapMap)
402 // insert gaps into profile
403 for (int lp = 0, r = 0; r < gapMap.length; r++)
405 if (gapMap[r] - lp > 1)
407 insertGapsAt(al, gc, gapMap[r], gapMap[r] - lp);
413 private static void insertGapsAt(AlignmentI al, char gc, int i, int lp)
416 StringBuffer sb = new StringBuffer();
417 for (int s = 0, ns = lp; s < ns; s++)
421 for (int s = 1, ns = al.getHeight(); s < ns; s++)
423 String sq = al.getSequenceAt(s).getSequenceAsString();
424 int diff = i - sq.length();
429 while ((diff = i - sq.length()) > 0)
432 + ((diff >= sb.length()) ? sb.toString() : sb.substring(
435 al.getSequenceAt(s).setSequence(sq);
439 al.getSequenceAt(s).setSequence(
440 sq.substring(0, i) + sb.toString() + sq.substring(i));