package jalview.ws.jws1; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SequenceI; import jalview.io.FileParse; import jalview.io.FormatAdapter; import jalview.util.MessageManager; import java.io.IOException; import java.util.Hashtable; import java.util.List; /** * extraction of processing routines to allow mocking * * @author jprocter * */ public class JPredWSUtils { /** * Process data extracted from service result set to generate a JPred result * view. * * @param currentView * * @param input * - original input alignment * @param gapChar * - character to use for reconstructing alignment used for * prediction * @param SequenceInfo * - from SeqHash * @param msaPred * - true if a prediction based on existing MSA * @param predMap * - position * @param result_PredFile * @param result_Aligfile * @param full_alignment * @return { Alignment, ColumnSelection } * @throws Exception */ public static Object[] processJnetResult(AlignmentI currentView, AlignmentView input, char gapChar, Hashtable SequenceInfo, boolean msaPred, int[] predMap, String result_PredFile, String result_Aligfile, FileParse full_alignment) throws Exception { AlignmentI al = null; ColumnSelection alcsel = null; // the position of the query sequence in Alignment al int FirstSeq = -1; // the position of the original sequence in the array of // Sequences in the input object that this job holds a // prediction for int msaIndex = 0; // JPredFile prediction = new JPredFile("C:/JalviewX/files/jpred.txt", // "File"); jalview.io.JPredFile prediction = new jalview.io.JPredFile( result_PredFile, "Paste"); SequenceI[] preds = prediction.getSeqsAsArray(); jalview.bin.Cache.log.debug("Got prediction profile."); if (msaPred && (result_Aligfile != null)) { jalview.bin.Cache.log.debug("Getting associated alignment."); // we ignore the returned alignment if we only predicted on a single // sequence String format = new jalview.io.IdentifyFile().identify( result_Aligfile, "Paste"); if (jalview.io.FormatAdapter.isValidFormat(format)) { SequenceI sqs[]; if (predMap != null) { Object[] alandcolsel = input .getAlignmentAndColumnSelection(gapChar); sqs = (SequenceI[]) alandcolsel[0]; al = new Alignment(sqs); alcsel = (ColumnSelection) alandcolsel[1]; } else { al = new FormatAdapter().readFile(result_Aligfile, "Paste", format); sqs = new SequenceI[al.getHeight()]; for (int i = 0, j = al.getHeight(); i < j; i++) { sqs[i] = al.getSequenceAt(i); } if (!jalview.analysis.SeqsetUtils.deuniquify(SequenceInfo, sqs)) { throw (new Exception( MessageManager .getString("exception.couldnt_recover_sequence_properties_for_alignment"))); } } FirstSeq = 0; if (currentView.getDataset() != null) { al.setDataset(currentView.getDataset()); } else { al.setDataset(null); } jalview.io.JnetAnnotationMaker.add_annotation(prediction, al, FirstSeq, false, predMap); } else { throw (new Exception(MessageManager.formatMessage( "exception.unknown_format_for_file", new String[] { format, result_Aligfile }))); } } else { AlignmentI fullAlignment = null; try { // read full alignment if present. if (!msaPred && full_alignment != null) { fullAlignment = new FormatAdapter().readFromFile(full_alignment, "FASTA"); } } catch (IOException q) { } { if (fullAlignment != null) { al = fullAlignment; FirstSeq = 0; } else { al = new Alignment(preds); FirstSeq = prediction.getQuerySeqPosition(); } } if (predMap != null) { // map the prediction onto the query sequence, excluding positions // corresponding to hidden regions in the original input. char gc = gapChar; SequenceI[] sqs = (SequenceI[]) input .getAlignmentAndColumnSelection(gc)[0]; if (msaIndex >= sqs.length) { throw new Error( MessageManager .getString("error.implementation_error_invalid_msa_index_for_job")); } // // // Uses RemoveGapsCommand // // new jalview.commands.RemoveGapsCommand( MessageManager.getString("label.remove_gaps"), new SequenceI[] { sqs[msaIndex] }, currentView); if (fullAlignment == null) { // just replace trimmed sequence in prediction profile with full // length sequence SequenceI profileseq = al.getSequenceAt(FirstSeq); profileseq.setSequence(sqs[msaIndex].getSequenceAsString()); } else { insertHiddenResidues(al, '.', predMap, sqs[msaIndex]); } } if (!jalview.analysis.SeqsetUtils.SeqCharacterUnhash( al.getSequenceAt(FirstSeq), SequenceInfo)) { throw (new Exception( MessageManager .getString("exception.couldnt_recover_sequence_props_for_jnet_query"))); } else { if (currentView.getDataset() != null) { al.setDataset(currentView.getDataset()); } else { al.setDataset(null); } if (fullAlignment != null) { // map gapMap from positions in visible sequence to positions in // original sequence if (predMap != null) { } } jalview.io.JnetAnnotationMaker.add_annotation(prediction, al, FirstSeq, true, predMap); SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps. if (fullAlignment == null) { alignToProfileSeq(al, profileseq); } if (fullAlignment == null && predMap != null) { // Adjust input view for gaps // propagate insertions into profile alcsel = ColumnSelection.propagateInsertions(profileseq, al, input); } } } // transfer to dataset for (AlignmentAnnotation alant : al.getAlignmentAnnotation()) { if (alant.sequenceRef != null) { replaceAnnotationOnAlignmentWith(alant, alant.label, "jalview.jws1.Jpred" + (msaPred ? "MSA" : ""), alant.sequenceRef); } } return new Object[] { al, alcsel }; // , FirstSeq, noMsa}; } /** * copied from JabawsCalcWorker * * @param newAnnot * @param typeName * @param calcId * @param aSeq */ public static void replaceAnnotationOnAlignmentWith( AlignmentAnnotation newAnnot, String typeName, String calcId, SequenceI aSeq) { SequenceI dsseq = aSeq.getDatasetSequence(); while (dsseq.getDatasetSequence() != null) { dsseq = dsseq.getDatasetSequence(); } // look for same annotation on dataset and lift this one over List dsan = dsseq.getAlignmentAnnotations(calcId, typeName); if (dsan != null && dsan.size() > 0) { for (AlignmentAnnotation dssan : dsan) { dsseq.removeAlignmentAnnotation(dssan); } } AlignmentAnnotation dssan = new AlignmentAnnotation(newAnnot); dsseq.addAlignmentAnnotation(dssan); dssan.adjustForAlignment(); } /** * Given an alignment where all other sequences except profileseq are aligned * to the ungapped profileseq, insert gaps in the other sequences to realign * them with the residues in profileseq * * @param al * @param profileseq */ public static void alignToProfileSeq(AlignmentI al, SequenceI profileseq) { char gc = al.getGapCharacter(); int[] gapMap = profileseq.gapMap(); insertGapsInto(al, gc, gapMap); } /** * Given an original sequence, and an alignment involving just the visible * region insert gaps into the alignment and add in the missing residues from * the original sequence * * @param al * @param c * @param profileseq */ public static void insertHiddenResidues(AlignmentI al, char gc, int[] predMap, SequenceI origseq) { // orig: asdfPPPPPPPasdfPPPPasdf // pred: PPPPPPPPPPP // al: -----P-P-P---P---P----P---P-P--PP---P--- // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS // // result: // // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS.... // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS.... // iteration 0: add asdf, append -----P // iteration 1: append -P // iteration 2: append -P // iteration 3: append ---P // iteration 4: append ---P // iteration 5: append ----P // iteration 6: append ---P // iteration 7: append -P // iteration 8: append P // iteration 9: append P // iteration 10: append ---P // tail: append: ---, add asdf String alseq = ""; SequenceI predseq = al.getSequenceAt(0); int predIdx = 0; // next column of prediction to preserve // positions in original and prediction sequence int lp = origseq.getStart() - 1, predPos = predseq.getStart(); for (int r = 0; r < predMap.length; r++) { // also need to keep track of trimmed prediction sequence numbering if (predMap[r] - lp > 1) { // hidden region insert from origseq String insert = origseq.getSequenceAsString( origseq.findIndex(lp + 1) - 1, origseq.findIndex(predMap[r]) - 1); insertGapsAt(al, gc, alseq.length(), insert.length()); alseq += insert; } // Now update prediction sequence for next position. { int predIdxNext = predseq.findIndex(predPos); // everything up // to the current // position in the // prediction // sequence // alignment if (predIdxNext <= predIdx) { predIdxNext = predseq.getLength(); } // just add in next segment of predseq String predsert = predseq.getSequenceAsString(predIdx, predIdxNext); alseq += predsert; predIdx = predIdxNext; } lp = predMap[r]; predPos++; } // append final bits // add any remaining gaps { int predIdxNext = predseq.findIndex(predPos); // everything up // to the current // position in the // prediction // sequence // alignment if (predIdxNext <= predIdx) { predIdxNext = predseq.getLength(); } // just add in next segment of predseq String predsert = predseq.getSequenceAsString(predIdx, predIdxNext); alseq += predsert; predIdx = predIdxNext; } if (lp < origseq.getEnd()) { String insert = origseq.getSequenceAsString( origseq.findIndex(lp + 1) - 1, origseq.getLength()); insertGapsAt(al, gc, alseq.length(), insert.length()); alseq += insert; } // then add in origseq data. predseq.setSequence(alseq); } public static void insertGapsInto(AlignmentI al, char gc, int[] gapMap) { // insert gaps into profile for (int lp = 0, r = 0; r < gapMap.length; r++) { if (gapMap[r] - lp > 1) { insertGapsAt(al, gc, gapMap[r], gapMap[r] - lp); } lp = gapMap[r]; } } private static void insertGapsAt(AlignmentI al, char gc, int i, int lp) { StringBuffer sb = new StringBuffer(); for (int s = 0, ns = lp; s < ns; s++) { sb.append(gc); } for (int s = 1, ns = al.getHeight(); s < ns; s++) { String sq = al.getSequenceAt(s).getSequenceAsString(); int diff = i - sq.length(); if (diff > 0) { // pad gaps sq = sq + sb; while ((diff = i - sq.length()) > 0) { sq = sq + ((diff >= sb.length()) ? sb.toString() : sb.substring( 0, diff)); } al.getSequenceAt(s).setSequence(sq); } else { al.getSequenceAt(s).setSequence( sq.substring(0, i) + sb.toString() + sq.substring(i)); } } } }