From 5413e3be3afeac58aa213af08b5c355981afc491 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 1 Oct 2015 11:46:33 +0100 Subject: [PATCH] JAL-1499 parse files with position number comments like [24] --- src/jalview/io/MegaFile.java | 36 ++++++++++++++---------------- test/jalview/io/MegaFileTest.java | 44 +++++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/src/jalview/io/MegaFile.java b/src/jalview/io/MegaFile.java index 2f5e35a..1126cff 100644 --- a/src/jalview/io/MegaFile.java +++ b/src/jalview/io/MegaFile.java @@ -252,8 +252,10 @@ public class MegaFile extends AlignFile } /** - * Returns the next line that is not a comment, or null at end of file. - * Comments in MEGA are within [ ] brackets, and may be nested. + * Returns the next non-comment line (or part line), or null at end of file. + * Comments in MEGA are within [ ] brackets, and may be nested. They may occur + * anywhere within a line (for example at the end with position numbers); this + * method returns the line with any comments removed. * * @param depth * current depth of nesting of comments while parsing @@ -272,16 +274,6 @@ public class MegaFile extends AlignFile } return data; } - int leftBracket = data.indexOf(COMMENT_START); - - /* - * reject unnested comment following data on the same line - */ - if (depth == 0 && leftBracket > 0) - { - throw new FileFormatException( - "Can't parse comment following data at " + data); - } /* * If we are in a (possibly nested) comment after parsing this line, keep @@ -295,15 +287,10 @@ public class MegaFile extends AlignFile else { /* - * not in a comment by end of this line; return what is left (or the next - * line if that is empty) + * not in a comment by end of this line; return what is left */ String nonCommentPart = getNonCommentContent(data, depth); - // if (nonCommentPart.length() > 0) - // { - return nonCommentPart; - // } - // return nextNonCommentLine(0); + return nonCommentPart; } } @@ -1215,10 +1202,19 @@ public class MegaFile extends AlignFile public String print(AlignmentI al) { this.nucleotide = al.isNucleotide(); + String lineLength = (String) al.getProperty(PROP_LINELENGTH); this.positionsPerLine = lineLength == null ? DEFAULT_LINE_LENGTH : Integer .parseInt(lineLength); - return printHeaders(al) + print(al.getSequencesArray()); + + String interleave = (String) al.getProperty(PROP_INTERLEAVED); + if (interleave != null) + { + this.interleaved = Boolean.valueOf(interleave); + } + + String headers = printHeaders(al); + return headers + print(al.getSequencesArray()); } /** diff --git a/test/jalview/io/MegaFileTest.java b/test/jalview/io/MegaFileTest.java index 309ffee..477f982 100644 --- a/test/jalview/io/MegaFileTest.java +++ b/test/jalview/io/MegaFileTest.java @@ -88,7 +88,7 @@ public class MegaFileTest //@formatter:on /** - * Test paste of interleaved mega format data. + * Test parse of interleaved mega format data. * * @throws IOException */ @@ -115,7 +115,7 @@ public class MegaFileTest } /** - * Test paste of noninterleaved mega format data. + * Test parse of noninterleaved mega format data. * * @throws IOException */ @@ -495,7 +495,7 @@ public class MegaFileTest //@formatter:on /** - * Test paste of interleaved mega format data where the identity character is + * Test parse of interleaved mega format data where the identity character is * used in sequences after the first * * @throws IOException @@ -531,7 +531,7 @@ public class MegaFileTest } /** - * Test paste of noninterleaved format data including identity symbol + * Test parse of noninterleaved format data including identity symbol * * @throws IOException */ @@ -567,4 +567,40 @@ public class MegaFileTest assertFalse("File format is not flagged as noninterleaved", testee.isInterleaved()); } + + //@formatter:on + + /** + * Test parse of interleaved format data including position number comments. + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testParse_interleavedWithPositionNumber() throws IOException + { + //@formatter:off + MegaFile testee = new MegaFile("#MEGA\n"+ + "TITLE: Interleaved sequence data\n\n" + + "#U455 ABCDEF [6]\n" + + "#CPZANT MNOPQR [6]\n\n" + + "#U455 KLMNOP [12]\n" + + "#CPZANT WXYZGC [12]\n", AppletFormatAdapter.PASTE); + //@formatter:on + assertEquals("Title not as expected", "Interleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + } } -- 1.7.10.2