From dcaf904aae4b64a04b5f606bf5278fc536c6cbaf Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 7 Oct 2015 13:58:20 +0100 Subject: [PATCH] JAL-1499 refactored processGeneOrDomain, expanded test --- src/jalview/io/MegaFile.java | 160 +++++++++++++++++++++++++++++++------ test/jalview/io/MegaFileTest.java | 44 +++++++--- 2 files changed, 169 insertions(+), 35 deletions(-) diff --git a/src/jalview/io/MegaFile.java b/src/jalview/io/MegaFile.java index c7caa3b..eb9868b 100644 --- a/src/jalview/io/MegaFile.java +++ b/src/jalview/io/MegaFile.java @@ -105,6 +105,8 @@ public class MegaFile extends AlignFile private static final String CODONSTART = "CodonStart"; + private static final String LABEL = "Label"; + /* * names of properties to save to the alignment (may affect eventual output * format) @@ -241,6 +243,10 @@ public class MegaFile extends AlignFile { parseGeneOrDomain(dataLine); } + else if (dataLine.startsWith(BANG + LABEL)) + { + parseLabel(dataLine); + } else { currentSequenceId = parseDataLine(dataLine); @@ -269,6 +275,17 @@ public class MegaFile extends AlignFile } /** + * Parse a !Label + * + * @param dataLine + */ + protected void parseLabel(String dataLine) + { + // TODO Auto-generated method stub + + } + + /** * Post-processing after reading one block of interleaved data */ protected void endOfDataBlock() @@ -362,42 +379,136 @@ public class MegaFile extends AlignFile protected void processGeneOrDomain(String gene, String domain, String property, String codonStart) { - boolean domainEnd = "domainend".equalsIgnoreCase(property); - /* - * If we have been processing a Domain or Gene, and this does not continue - * it, then close it off (generate sequence features for it). Do Domain - * first as it is in the context of the enclosing gene if any. + * the order of processing below ensures that we correctly capture where a + * domain is in the context of an enclosing gene */ - if (this.currentDomain != null) + processDomainEnd(domain, property); + + processGeneEnd(gene); + + processGeneStart(gene); + + processDomainStart(domain, property); + + // TODO save codonStart if we plan to involve it in 'translate as cDNA' + } + + /** + * If we have declared a domain, and it is not continuing, start a sequence + * feature for it + * + * @param domain + * @param property + */ + protected void processDomainStart(String domain, String property) + { + if ("domainend".equalsIgnoreCase(property)) { - if (!this.currentDomain.equals(domain) || domainEnd) - { - String description = currentDomain - + (currentGene == null ? "" : " (" + currentGene + ")"); - createFeature(DOMAIN, description, domainStart); - } + currentDomain = null; + return; } - if (this.currentGene != null && !this.currentGene.equals(gene)) + + if (domain != null && !domain.equals(currentDomain)) { - createFeature(GENE, currentGene, geneStart); + String verboseDomain = makeVerboseDomainName(domain, property); + startSequenceFeature(domainStart); + + currentDomain = verboseDomain; } + } - /* - * and if we have declared a Gene or Domain which does not continue the - * current one, then record its start positions per sequence - */ + /** + * If we have declared a gene, and it is not continuing, start a sequence + * feature for it + * + * @param gene + */ + protected void processGeneStart(String gene) + { if (gene != null && !gene.equals(currentGene)) { startSequenceFeature(geneStart); } - if (domain != null && !domain.equals(currentDomain)) + currentGene = gene; + } + + /** + * If we have been processing a domain, and it is not being continued, then + * make a sequence feature for the domain just ended + * + * @param domain + * @param property + * @return true if a feature is created, else false + */ + protected boolean processDomainEnd(String domain, String property) + { + String verboseDomain = makeVerboseDomainName(domain, property); + if (this.currentDomain != null) { - startSequenceFeature(domainStart); + boolean domainEnded = "domainend".equalsIgnoreCase(property); + if (!this.currentDomain.equals(verboseDomain) || domainEnded) + { + createFeature(DOMAIN, currentDomain, domainStart); + return true; + } } + return false; + } - currentGene = gene; - currentDomain = domainEnd ? null : domain; + /** + * If we have been processing a gene, and it is not being continued, then make + * a sequence feature for the gene just ended + * + * @param gene + * @return true if a feature is created, else false + */ + protected boolean processGeneEnd(String gene) + { + boolean created = false; + /* + * If we were processing a gene and now have either another, or none, create + * a sequence feature for that gene + */ + if (this.currentGene != null && !this.currentGene.equals(gene)) + { + createFeature(GENE, currentGene, geneStart); + created = true; + } + + return created; + } + + /** + * Makes an expanded descriptive name for Domain if possible e.g. + * "Intron1 (Adh Coding)". Currently incorporates the current gene name (if + * any) and the Coding/Noncoding property value (if given). + * + * @param domain + * @param property + * @return + */ + protected String makeVerboseDomainName(String domain, String property) + { + String verboseDomain = domain; + if (domain != null) + { + String coding = ""; + if ("Exon".equalsIgnoreCase(property) + || "Coding".equalsIgnoreCase(property)) + { + coding = " Coding"; + } + else if ("Intron".equalsIgnoreCase(property) + || "Noncoding".equalsIgnoreCase(property)) + { + coding = " Noncoding"; + } + verboseDomain = domain + + (currentGene == null ? "" : " (" + currentGene + coding + + ")"); + } + return verboseDomain; } /** @@ -1391,9 +1502,8 @@ public class MegaFile extends AlignFile { if (this.interleaved != null && isIt != this.interleaved.booleanValue()) { - throw new FileFormatException( - "Parse error: mix of interleaved and noninterleaved detected, at line: " - + dataLine); + throw new FileFormatException("Parse error: interleaved was " + !isIt + + " but now seems to be " + isIt + ", at line: " + dataLine); } this.interleaved = new Boolean(isIt); setAlignmentProperty(PROP_INTERLEAVED, interleaved.toString()); diff --git a/test/jalview/io/MegaFileTest.java b/test/jalview/io/MegaFileTest.java index 2766610..1f8e4d6 100644 --- a/test/jalview/io/MegaFileTest.java +++ b/test/jalview/io/MegaFileTest.java @@ -177,7 +177,7 @@ public class MegaFileTest { assertEquals( "Unexpected exception message", - "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ", + "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ", e.getMessage()); } @@ -623,14 +623,22 @@ public class MegaFileTest "!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" + "#U455 GGGGGG\n" + "#CPZANT AAAAAA\n\n" + - "!Domain=Intron1 Gene=Adh;\n" + + "!Domain=Intron1 Property=Intron Gene=Adh;\n" + "#U455 tttttt\n" + "#CPZANT cccccc\n\n" + - "!Domain=Exon2 Gene=Adh Property=Coding CodonStart=1;\n" + + "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" + "#U455 aaaaaa\n" + "#CPZANT gggggg\n\n" + // explicit end of Exon2, implicit end of Adh: "!Domain=Exon2 Property=domainend;\n" + + "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" + + "#U455 GGGGGG\n" + + "#CPZANT AAAAAA\n\n" + + // end Opsin, start MEF2A + "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" + + "#U455 tttttt\n" + + "#CPZANT cccccc\n\n" + + // end MEF2A "!Domain=BindingSite;\n" + "#U455 CCCCCC\n" + "#CPZANT TTTTTT\n\n"; @@ -642,10 +650,10 @@ public class MegaFileTest assertEquals("Expected two sequences", 2, seqs.size()); // check sequence data assertEquals("First sequence data wrong", - "CCCCCCGGGGGGttttttaaaaaaCCCCCC", seqs.get(0) + "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0) .getSequenceAsString()); assertEquals("Second sequence data wrong", - "TTTTTTAAAAAAccccccggggggTTTTTT", seqs.get(1) + "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1) .getSequenceAsString()); /* @@ -656,17 +664,17 @@ public class MegaFileTest SequenceFeature[] sfs = seq.getSequenceFeatures(); // features are added in the order in which their end is found // (Domain before Gene when they end together) - assertEquals(5, sfs.length); + assertEquals(9, sfs.length); // TODO settle which way round type/description go! - assertEquals("Exon1 (Adh)", sfs[0].type); + assertEquals("Exon1 (Adh Coding)", sfs[0].type); assertEquals("Domain", sfs[0].description); assertEquals(7, sfs[0].begin); assertEquals(12, sfs[0].end); - assertEquals("Intron1 (Adh)", sfs[1].type); + assertEquals("Intron1 (Adh Noncoding)", sfs[1].type); assertEquals("Domain", sfs[1].description); assertEquals(13, sfs[1].begin); assertEquals(18, sfs[1].end); - assertEquals("Exon2 (Adh)", sfs[2].type); + assertEquals("Exon2 (Adh Coding)", sfs[2].type); assertEquals("Domain", sfs[2].description); assertEquals(19, sfs[2].begin); assertEquals(24, sfs[2].end); @@ -674,10 +682,26 @@ public class MegaFileTest assertEquals("Gene", sfs[3].description); assertEquals(7, sfs[3].begin); assertEquals(24, sfs[3].end); - assertEquals("BindingSite", sfs[4].type); + assertEquals("Intron1 (Opsin Noncoding)", sfs[4].type); assertEquals("Domain", sfs[4].description); assertEquals(25, sfs[4].begin); assertEquals(30, sfs[4].end); + assertEquals("Opsin", sfs[5].type); + assertEquals("Gene", sfs[5].description); + assertEquals(25, sfs[5].begin); + assertEquals(30, sfs[5].end); + assertEquals("Exon1 (MEF2A Coding)", sfs[6].type); + assertEquals("Domain", sfs[6].description); + assertEquals(31, sfs[6].begin); + assertEquals(36, sfs[6].end); + assertEquals("MEF2A", sfs[7].type); + assertEquals("Gene", sfs[7].description); + assertEquals(31, sfs[7].begin); + assertEquals(36, sfs[7].end); + assertEquals("BindingSite", sfs[8].type); + assertEquals("Domain", sfs[8].description); + assertEquals(37, sfs[8].begin); + assertEquals(42, sfs[8].end); } } } -- 1.7.10.2