From 4a85628075aa27923821a910758c9b9d69cb1708 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 23 Oct 2015 10:27:20 +0100 Subject: [PATCH 1/1] JAL-1499 slight refactoring of handling interleaved data blocks --- src/jalview/io/MegaFile.java | 20 ++++++++++++++------ test/jalview/io/MegaFileTest.java | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/jalview/io/MegaFile.java b/src/jalview/io/MegaFile.java index 5a76d07..73cfe23 100644 --- a/src/jalview/io/MegaFile.java +++ b/src/jalview/io/MegaFile.java @@ -175,7 +175,7 @@ public class MegaFile extends AlignFile private Boolean nucleotide; // set once we have seen one block of interleaved data - private boolean firstDataBlockRead = false; + private boolean seenAllSequences = false; // this can be True, False or null (meaning we don't know yet) private Boolean interleaved; @@ -284,11 +284,13 @@ public class MegaFile extends AlignFile if (upperCased.startsWith(BANG + GENE.toUpperCase()) || upperCased.startsWith(BANG + DOMAIN.toUpperCase())) { + endDataBlock(); parseGeneOrDomain(dataLine); } else if (upperCased.startsWith(BANG + LABEL.toUpperCase())) { parseLabel(dataLine); + endDataBlock(); } else { @@ -300,7 +302,7 @@ public class MegaFile extends AlignFile /* * Blank line after processing some data... */ - endOfDataBlock(); + endDataBlock(); } dataLine = nextNonCommentLine(); } @@ -410,10 +412,8 @@ public class MegaFile extends AlignFile /** * Post-processing after reading one block of interleaved data */ - protected void endOfDataBlock() + protected void endDataBlock() { - this.firstDataBlockRead = true; - padAnnotations(labelAnnotations); } @@ -1029,6 +1029,14 @@ public class MegaFile extends AlignFile // and a placeholder for any SequenceFeature found sequenceFeatures.put(currentId, new ArrayList()); } + else + { + /* + * we are appending to a previously seen sequence; flag that we have seen + * all sequences + */ + this.seenAllSequences = true; + } return sb; } @@ -1049,7 +1057,7 @@ public class MegaFile extends AlignFile /* * New sequence found in second or later data block - error. */ - if (this.firstDataBlockRead && !seqData.containsKey(seqId)) + if (this.seenAllSequences && !seqData.containsKey(seqId)) { throw new FileFormatException( "Parse error: misplaced new sequence starting at " + dataLine); diff --git a/test/jalview/io/MegaFileTest.java b/test/jalview/io/MegaFileTest.java index 4a3918e..860a898 100644 --- a/test/jalview/io/MegaFileTest.java +++ b/test/jalview/io/MegaFileTest.java @@ -74,7 +74,8 @@ public class MegaFileTest + "!TITLE Interleaved sequence data\n\n" + "#U455 ABCDEF\n" + "#CPZANT MNOPQR\n\n" - + "#U456 KLMNOP\n"; + + "#U455 GHIJKL\n" + + "#U456 KLMNOP\n"; // wossis? // interleaved with description, bases/gaps in triplet groups private static final String INTERLEAVED_WITH_DESCRIPTION = @@ -1019,4 +1020,35 @@ public class MegaFileTest assertEquals("Noncoding", MegaFile.getPropertyFromAnnotation(3, aa)); assertEquals("Coding", MegaFile.getPropertyFromAnnotation(4, aa)); } + + //@formatter:on + + /** + * Test parse of interleaved data with no blank lines to separate blocks of + * sequence data; to confirm we can handle this correctly + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testParse_interleaved_noBlankLines() throws IOException + { + String data = INTERLEAVED.replace("\n\n", "\n"); + MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE); + assertEquals("Title not as expected", "Interleaved sequence data", + testee.getAlignmentProperty(MegaFile.PROP_TITLE)); + Vector seqs = testee.getSeqs(); + // should be 2 sequences + assertEquals("Expected two sequences", 2, seqs.size()); + // check sequence names correct and order preserved + assertEquals("First sequence id wrong", "U455", seqs.get(0).getName()); + assertEquals("Second sequence id wrong", "CPZANT", seqs.get(1) + .getName()); + // check sequence data + assertEquals("First sequence data wrong", "ABCDEFKLMNOP", seqs.get(0) + .getSequenceAsString()); + assertEquals("Second sequence data wrong", "MNOPQRWXYZGC", seqs.get(1) + .getSequenceAsString()); + assertTrue("File format is not flagged as interleaved", + testee.isInterleaved()); + } } -- 1.7.10.2