private static final String CODONSTART = "CodonStart";
+ private static final String LABEL = "Label";
+
/*
* names of properties to save to the alignment (may affect eventual output
* format)
{
parseGeneOrDomain(dataLine);
}
+ else if (dataLine.startsWith(BANG + LABEL))
+ {
+ parseLabel(dataLine);
+ }
else
{
currentSequenceId = parseDataLine(dataLine);
}
/**
+ * Parse a !Label
+ *
+ * @param dataLine
+ */
+ protected void parseLabel(String dataLine)
+ {
+ // TODO Auto-generated method stub
+
+ }
+
+ /**
* Post-processing after reading one block of interleaved data
*/
protected void endOfDataBlock()
protected void processGeneOrDomain(String gene, String domain,
String property, String codonStart)
{
- boolean domainEnd = "domainend".equalsIgnoreCase(property);
-
/*
- * If we have been processing a Domain or Gene, and this does not continue
- * it, then close it off (generate sequence features for it). Do Domain
- * first as it is in the context of the enclosing gene if any.
+ * the order of processing below ensures that we correctly capture where a
+ * domain is in the context of an enclosing gene
*/
- if (this.currentDomain != null)
+ processDomainEnd(domain, property);
+
+ processGeneEnd(gene);
+
+ processGeneStart(gene);
+
+ processDomainStart(domain, property);
+
+ // TODO save codonStart if we plan to involve it in 'translate as cDNA'
+ }
+
+ /**
+ * If we have declared a domain, and it is not continuing, start a sequence
+ * feature for it
+ *
+ * @param domain
+ * @param property
+ */
+ protected void processDomainStart(String domain, String property)
+ {
+ if ("domainend".equalsIgnoreCase(property))
{
- if (!this.currentDomain.equals(domain) || domainEnd)
- {
- String description = currentDomain
- + (currentGene == null ? "" : " (" + currentGene + ")");
- createFeature(DOMAIN, description, domainStart);
- }
+ currentDomain = null;
+ return;
}
- if (this.currentGene != null && !this.currentGene.equals(gene))
+
+ if (domain != null && !domain.equals(currentDomain))
{
- createFeature(GENE, currentGene, geneStart);
+ String verboseDomain = makeVerboseDomainName(domain, property);
+ startSequenceFeature(domainStart);
+
+ currentDomain = verboseDomain;
}
+ }
- /*
- * and if we have declared a Gene or Domain which does not continue the
- * current one, then record its start positions per sequence
- */
+ /**
+ * If we have declared a gene, and it is not continuing, start a sequence
+ * feature for it
+ *
+ * @param gene
+ */
+ protected void processGeneStart(String gene)
+ {
if (gene != null && !gene.equals(currentGene))
{
startSequenceFeature(geneStart);
}
- if (domain != null && !domain.equals(currentDomain))
+ currentGene = gene;
+ }
+
+ /**
+ * If we have been processing a domain, and it is not being continued, then
+ * make a sequence feature for the domain just ended
+ *
+ * @param domain
+ * @param property
+ * @return true if a feature is created, else false
+ */
+ protected boolean processDomainEnd(String domain, String property)
+ {
+ String verboseDomain = makeVerboseDomainName(domain, property);
+ if (this.currentDomain != null)
{
- startSequenceFeature(domainStart);
+ boolean domainEnded = "domainend".equalsIgnoreCase(property);
+ if (!this.currentDomain.equals(verboseDomain) || domainEnded)
+ {
+ createFeature(DOMAIN, currentDomain, domainStart);
+ return true;
+ }
}
+ return false;
+ }
- currentGene = gene;
- currentDomain = domainEnd ? null : domain;
+ /**
+ * If we have been processing a gene, and it is not being continued, then make
+ * a sequence feature for the gene just ended
+ *
+ * @param gene
+ * @return true if a feature is created, else false
+ */
+ protected boolean processGeneEnd(String gene)
+ {
+ boolean created = false;
+ /*
+ * If we were processing a gene and now have either another, or none, create
+ * a sequence feature for that gene
+ */
+ if (this.currentGene != null && !this.currentGene.equals(gene))
+ {
+ createFeature(GENE, currentGene, geneStart);
+ created = true;
+ }
+
+ return created;
+ }
+
+ /**
+ * Makes an expanded descriptive name for Domain if possible e.g.
+ * "Intron1 (Adh Coding)". Currently incorporates the current gene name (if
+ * any) and the Coding/Noncoding property value (if given).
+ *
+ * @param domain
+ * @param property
+ * @return
+ */
+ protected String makeVerboseDomainName(String domain, String property)
+ {
+ String verboseDomain = domain;
+ if (domain != null)
+ {
+ String coding = "";
+ if ("Exon".equalsIgnoreCase(property)
+ || "Coding".equalsIgnoreCase(property))
+ {
+ coding = " Coding";
+ }
+ else if ("Intron".equalsIgnoreCase(property)
+ || "Noncoding".equalsIgnoreCase(property))
+ {
+ coding = " Noncoding";
+ }
+ verboseDomain = domain
+ + (currentGene == null ? "" : " (" + currentGene + coding
+ + ")");
+ }
+ return verboseDomain;
}
/**
{
if (this.interleaved != null && isIt != this.interleaved.booleanValue())
{
- throw new FileFormatException(
- "Parse error: mix of interleaved and noninterleaved detected, at line: "
- + dataLine);
+ throw new FileFormatException("Parse error: interleaved was " + !isIt
+ + " but now seems to be " + isIt + ", at line: " + dataLine);
}
this.interleaved = new Boolean(isIt);
setAlignmentProperty(PROP_INTERLEAVED, interleaved.toString());
{
assertEquals(
"Unexpected exception message",
- "Parse error: mix of interleaved and noninterleaved detected, at line: ABCFEDHIJ",
+ "Parse error: interleaved was true but now seems to be false, at line: ABCFEDHIJ",
e.getMessage());
}
"!Domain=Exon1 Gene=Adh Property=Coding CodonStart=1;\n" +
"#U455 GGGGGG\n" +
"#CPZANT AAAAAA\n\n" +
- "!Domain=Intron1 Gene=Adh;\n" +
+ "!Domain=Intron1 Property=Intron Gene=Adh;\n" +
"#U455 tttttt\n" +
"#CPZANT cccccc\n\n" +
- "!Domain=Exon2 Gene=Adh Property=Coding CodonStart=1;\n" +
+ "!Domain=Exon2 Gene=Adh Property=Exon CodonStart=1;\n" +
"#U455 aaaaaa\n" +
"#CPZANT gggggg\n\n" +
// explicit end of Exon2, implicit end of Adh:
"!Domain=Exon2 Property=domainend;\n" +
+ "!Domain=Intron1 Gene=Opsin Property=Noncoding;\n" +
+ "#U455 GGGGGG\n" +
+ "#CPZANT AAAAAA\n\n" +
+ // end Opsin, start MEF2A
+ "!Domain=Exon1 Gene=MEF2A Property=Coding CodonStart=1;\n" +
+ "#U455 tttttt\n" +
+ "#CPZANT cccccc\n\n" +
+ // end MEF2A
"!Domain=BindingSite;\n" +
"#U455 CCCCCC\n" +
"#CPZANT TTTTTT\n\n";
assertEquals("Expected two sequences", 2, seqs.size());
// check sequence data
assertEquals("First sequence data wrong",
- "CCCCCCGGGGGGttttttaaaaaaCCCCCC", seqs.get(0)
+ "CCCCCCGGGGGGttttttaaaaaaGGGGGGttttttCCCCCC", seqs.get(0)
.getSequenceAsString());
assertEquals("Second sequence data wrong",
- "TTTTTTAAAAAAccccccggggggTTTTTT", seqs.get(1)
+ "TTTTTTAAAAAAccccccggggggAAAAAAccccccTTTTTT", seqs.get(1)
.getSequenceAsString());
/*
SequenceFeature[] sfs = seq.getSequenceFeatures();
// features are added in the order in which their end is found
// (Domain before Gene when they end together)
- assertEquals(5, sfs.length);
+ assertEquals(9, sfs.length);
// TODO settle which way round type/description go!
- assertEquals("Exon1 (Adh)", sfs[0].type);
+ assertEquals("Exon1 (Adh Coding)", sfs[0].type);
assertEquals("Domain", sfs[0].description);
assertEquals(7, sfs[0].begin);
assertEquals(12, sfs[0].end);
- assertEquals("Intron1 (Adh)", sfs[1].type);
+ assertEquals("Intron1 (Adh Noncoding)", sfs[1].type);
assertEquals("Domain", sfs[1].description);
assertEquals(13, sfs[1].begin);
assertEquals(18, sfs[1].end);
- assertEquals("Exon2 (Adh)", sfs[2].type);
+ assertEquals("Exon2 (Adh Coding)", sfs[2].type);
assertEquals("Domain", sfs[2].description);
assertEquals(19, sfs[2].begin);
assertEquals(24, sfs[2].end);
assertEquals("Gene", sfs[3].description);
assertEquals(7, sfs[3].begin);
assertEquals(24, sfs[3].end);
- assertEquals("BindingSite", sfs[4].type);
+ assertEquals("Intron1 (Opsin Noncoding)", sfs[4].type);
assertEquals("Domain", sfs[4].description);
assertEquals(25, sfs[4].begin);
assertEquals(30, sfs[4].end);
+ assertEquals("Opsin", sfs[5].type);
+ assertEquals("Gene", sfs[5].description);
+ assertEquals(25, sfs[5].begin);
+ assertEquals(30, sfs[5].end);
+ assertEquals("Exon1 (MEF2A Coding)", sfs[6].type);
+ assertEquals("Domain", sfs[6].description);
+ assertEquals(31, sfs[6].begin);
+ assertEquals(36, sfs[6].end);
+ assertEquals("MEF2A", sfs[7].type);
+ assertEquals("Gene", sfs[7].description);
+ assertEquals(31, sfs[7].begin);
+ assertEquals(36, sfs[7].end);
+ assertEquals("BindingSite", sfs[8].type);
+ assertEquals("Domain", sfs[8].description);
+ assertEquals(37, sfs[8].begin);
+ assertEquals(42, sfs[8].end);
}
}
}