From: Jim Procter Date: Thu, 12 May 2016 11:24:30 +0000 (+0100) Subject: JAL-2103 - refactored msa transformation routines and added (failing) test for new... X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=7c337dbac26794f750936e1516ee171097ec7fb3;p=jalview.git JAL-2103 - refactored msa transformation routines and added (failing) test for new transform routine. --- diff --git a/src/jalview/ws/jws1/JPredWSUtils.java b/src/jalview/ws/jws1/JPredWSUtils.java index 35447fd..49c4fb3 100644 --- a/src/jalview/ws/jws1/JPredWSUtils.java +++ b/src/jalview/ws/jws1/JPredWSUtils.java @@ -143,7 +143,7 @@ public class JPredWSUtils } catch (IOException q) { - } finally + } { if (fullAlignment != null) { @@ -202,6 +202,15 @@ public class JPredWSUtils { al.setDataset(null); } + if (fullAlignment != null) + { + // map gapMap from positions in visible sequence to positions in + // original sequence + if (predMap != null) + { + + } + } jalview.io.JnetAnnotationMaker.add_annotation(prediction, al, FirstSeq, true, predMap); SequenceI profileseq = al.getSequenceAt(0); // this includes any gaps. @@ -277,41 +286,121 @@ public class JPredWSUtils { char gc = al.getGapCharacter(); int[] gapMap = profileseq.gapMap(); + insertGapsInto(al, gc, gapMap); + } + + /** + * Given an original sequence, and an alignment involving just the visible + * region insert gaps into the alignment and add in the missing residues from + * the original sequence + * + * @param al + * @param c + * @param profileseq + */ + public static void insertHiddenResidues(AlignmentI al, char gc, + int[] predMap, + SequenceI origseq) + { + // orig: asdfPPPPPPPasdfPPPPasdf + // pred: PPPPPPPPPPP + // al: -----P-P-P---P---P----P---P-P--PP---P--- + // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS + // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS + // + // result: + // + // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf + // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS.... + // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS.... + String alseq = ""; + int lsp = 0; + SequenceI predseq = al.getSequenceAt(0); + int predIdx = 0; // next column of prediction to preserve + // positions in original and prediction sequence + int lp = origseq.getStart(), predPos = predseq.getStart(); + for (int r = 0; r < predMap.length; r++) + { + // also need to keep track of trimmed prediction sequence numbering + if (predMap[r] - lp > 1) + { + // hidden region insert from origseq + String insert = origseq.getSequenceAsString( + origseq.findIndex(lp) - 1, + origseq.findIndex(predMap[r]) - 1); + + insertGapsAt(al, gc, alseq.length(), insert.length()); + alseq += insert; + } + // Now update prediction sequence for next position. + { + int predIdxNext = predseq.findIndex(predPos + 1) - 1; + if (predIdxNext <= predIdx) + { + predIdxNext = predseq.getLength(); + } + // just add in next segment of predseq + String predsert = predseq.getSequenceAsString(predIdx, predIdxNext); + alseq += predsert; + predIdx = predIdxNext; + } + lp = predMap[r]; + predPos++; + } + // append final bits + if (lp < origseq.getEnd()) + { + String insert = origseq.getSequenceAsString( + origseq.findIndex(lp) - 1, origseq.getLength()); + insertGapsAt(al, gc, alseq.length(), insert.length()); + alseq += insert; + } + // then add in origseq data. + predseq.setSequence(alseq); + } + + public static void insertGapsInto(AlignmentI al, char gc, int[] gapMap) + { // insert gaps into profile for (int lp = 0, r = 0; r < gapMap.length; r++) { if (gapMap[r] - lp > 1) { - StringBuffer sb = new StringBuffer(); - for (int s = 0, ns = gapMap[r] - lp; s < ns; s++) - { - sb.append(gc); - } - for (int s = 1, ns = al.getHeight(); s < ns; s++) + insertGapsAt(al, gc, gapMap[r], gapMap[r]-lp); + } + lp = gapMap[r]; + } + } + + private static void insertGapsAt(AlignmentI al, char gc, int i, int lp) + { + + StringBuffer sb = new StringBuffer(); + for (int s = 0, ns = lp; s < ns; s++) + { + sb.append(gc); + } + for (int s = 1, ns = al.getHeight(); s < ns; s++) + { + String sq = al.getSequenceAt(s).getSequenceAsString(); + int diff = i - sq.length(); + if (diff > 0) + { + // pad gaps + sq = sq + sb; + while ((diff = i - sq.length()) > 0) { - String sq = al.getSequenceAt(s).getSequenceAsString(); - int diff = gapMap[r] - sq.length(); - if (diff > 0) - { - // pad gaps - sq = sq + sb; - while ((diff = gapMap[r] - sq.length()) > 0) - { - sq = sq - + ((diff >= sb.length()) ? sb.toString() : sb - .substring(0, diff)); - } - al.getSequenceAt(s).setSequence(sq); - } - else - { - al.getSequenceAt(s).setSequence( - sq.substring(0, gapMap[r]) + sb.toString() - + sq.substring(gapMap[r])); - } + sq = sq + + ((diff >= sb.length()) ? sb.toString() : sb.substring( + 0, diff)); } + al.getSequenceAt(s).setSequence(sq); + } + else + { + al.getSequenceAt(s).setSequence( + sq.substring(0, i) + sb.toString() + sq.substring(i)); } - lp = gapMap[r]; } } diff --git a/test/jalview/ws/jws1/JPredWSUtilsTest.java b/test/jalview/ws/jws1/JPredWSUtilsTest.java index 0563647..e930e55 100644 --- a/test/jalview/ws/jws1/JPredWSUtilsTest.java +++ b/test/jalview/ws/jws1/JPredWSUtilsTest.java @@ -1,5 +1,11 @@ package jalview.ws.jws1; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import org.testng.Assert; import org.testng.annotations.Test; public class JPredWSUtilsTest @@ -28,4 +34,34 @@ public class JPredWSUtilsTest } + @Test(groups = { "Functional" }) + public void testInsertHiddenResidues() + { + // orig: asdfPPPPPPPasdfPPPPasdf + // pred: PPPPPPPPPPP + // al: -----P-P-P---P---P----P---P-P--PP---P--- + // s1: SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS + // s2: SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS + SequenceI orig = new Sequence("orig", "asdfPPPPPPPasdfPPPPasdf"), pred = new Sequence( + "pred", "PPPPPPPPPPP"), al = new Sequence("al/5-23", + "-----P-P-P---P---P----P---P-P--PP---P---"), s1 = new Sequence( + "s1", "SSSSSSS-SS---S---SSSSSS---S-S--SSSSSSSSS"), s2 = new Sequence( + "s2", "SSSSSSS-SSSSSSSSSSS----SSS-S-SSS-----SSS"); + + AlignmentI alpred = new Alignment(new SequenceI[] { al, s1, s2 }); + JPredWSUtils.insertHiddenResidues(alpred, '.', new int[] { 5, 6, 7, 8, + 9, 10, 11, 16, 17, 18, 19 }, orig); + Assert.assertEquals(alpred.getSequenceAt(2).getSequenceAsString(), + "....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS...."); + Assert.assertEquals(alpred.getSequenceAt(1).getSequenceAsString(), + "....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS...."); + Assert.assertEquals(alpred.getSequenceAt(0).getSequenceAsString(), + "asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf"); + + // result: + // + // al: asdf-----P-P-P---P---P----P---Pasdf-P--PP---P---asdf + // s1: ....SSSSSSS-SS---S---SSSSSS---S....-S--SSSSSSSSS.... + // s2: ....SSSSSSS-SSSSSSSSSSS----SSS-....S-SSS-----SSS.... + } }