{
int maxIdLength = getMaxIdLength(s);
int maxSequenceLength = getMaxSequenceLength(s);
- int numLines = maxSequenceLength / positionsPerLine + 3; // approx
- int numDataBlocks = (maxSequenceLength - 1) / positionsPerLine + 1;
int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
int chunksPerLine = (positionsPerLine + spaceEvery - 1) / spaceEvery;
/*
* Roughly size a buffer to hold the whole output
*/
+ int numLines = maxSequenceLength / positionsPerLine + 3; // approx
StringBuilder sb = new StringBuilder(numLines
* (maxIdLength + positionsPerLine + chunksPerLine + 10));
/*
* Output as: #Seqid CGT AGC ACT ... or blocks of 10 for peptide
*/
+ AlignmentAnnotation geneAnnotation = findAnnotation(MEGA_ANNOTATION_GENE);
+ AlignmentAnnotation domainAnnotation = findAnnotation(MEGA_ANNOTATION_DOMAIN);
+ currentGene = null;
+ currentDomain = null;
int from = 0;
- for (int i = 0; i < numDataBlocks; i++)
+
+ while (from < maxSequenceLength)
{
- sb.append(newline);
- boolean first = true;
- int advancedBy = 0;
- for (SequenceI seq : s)
+ printGeneOrDomainHeader(sb, from, geneAnnotation, domainAnnotation);
+ int maxCol = from + positionsPerLine; // exclusive
+ for (int col = from; col <= maxCol; col++)
{
- int seqFrom = from;
- String seqId = String.format("#%-" + maxIdLength + "s",
- seq.getName());
-
- /*
- * output next line for this sequence
- */
- sb.append(seqId);
- int lastPos = seqFrom + positionsPerLine; // exclusive
- for (int j = 0; j < chunksPerLine; j++)
+ if (geneOrDomainChanged(col, geneAnnotation, domainAnnotation)
+ || col == maxCol)
{
- char[] subSequence = seq.getSequence(seqFrom,
- Math.min(lastPos, seqFrom + spaceEvery));
- if (subSequence.length > 0)
- {
- sb.append(SPACE).append(subSequence);
- }
- seqFrom += subSequence.length;
- if (first)
+ /*
+ * print a block of sequences up to [col-1]
+ */
+ sb.append(newline);
+ boolean first = true;
+ int advancedBy = 0;
+ for (SequenceI seq : s)
{
- // all sequences should be the same length in MEGA
- advancedBy += subSequence.length;
+ int seqFrom = from;
+ String seqId = String.format("#%-" + maxIdLength + "s",
+ seq.getName());
+
+ /*
+ * output next line for this sequence
+ */
+ sb.append(seqId);
+ for (int j = 0; j < chunksPerLine; j++)
+ {
+ char[] subSequence = seq.getSequence(seqFrom,
+ Math.min(col, seqFrom + spaceEvery));
+ if (subSequence.length > 0)
+ {
+ sb.append(SPACE).append(subSequence);
+ }
+ seqFrom += subSequence.length;
+ if (first)
+ {
+ // all sequences should be the same length in MEGA
+ advancedBy += subSequence.length;
+ }
+ }
+ // write last position as a comment
+ if (writePositionNumbers)
+ {
+ sb.append(SPACE).append(COMMENT_START)
+ .append(String.valueOf(from + advancedBy))
+ .append(COMMENT_END);
+ }
+ sb.append(newline);
+ first = false;
}
+ sb.append(printLabel(from, advancedBy, maxIdLength));
+ from += advancedBy;
+ break;
}
- // write last position as a comment
- if (writePositionNumbers)
- {
- sb.append(SPACE).append(COMMENT_START).append(from + advancedBy)
- .append(COMMENT_END);
- }
- sb.append(newline);
- first = false;
}
- sb.append(printLabel(from, advancedBy, maxIdLength));
- from += advancedBy;
}
return new String(sb);
}
/**
+ * Returns true if we detect a change of gene or domain at the given column
+ * position. Currently done by inspecting any "MEGA Gene" or "MEGA Domain"
+ * annotation for the column.
+ *
+ * @param column
+ * @param geneAnnotation
+ * @param domainAnnotation
+ * @return
+ */
+ protected boolean geneOrDomainChanged(int column,
+ AlignmentAnnotation geneAnnotation,
+ AlignmentAnnotation domainAnnotation)
+ {
+ String gene = getGeneFromAnnotation(column, geneAnnotation);
+ String domain = getDomainFromAnnotation(column, domainAnnotation);
+ boolean domainEnd = domain != null
+ && domain.toUpperCase().indexOf(DOMAINEND.toUpperCase()) != -1;
+
+ if ((domainEnd || gene == null) && currentGene != null)
+ {
+ return true;
+ }
+ else if (gene != null && !gene.equals(currentGene))
+ {
+ return true;
+ }
+ // todo: Domain
+ return false;
+ }
+
+ /**
+ * Extracts the name of a domain from MEGA Domain annotation at the given
+ * column position, if any
+ *
+ * @param column
+ * @param domainAnnotation
+ * @return
+ */
+ protected static String getDomainFromAnnotation(int column,
+ AlignmentAnnotation domainAnnotation)
+ {
+ Annotation domainAnn = domainAnnotation == null ? null
+ : (column >= domainAnnotation.annotations.length ? null
+ : domainAnnotation.annotations[column]);
+ String domain = domainAnn == null ? null : domainAnn.description;
+ if (domain != null && domain.indexOf("(") > 0)
+ {
+ domain = domain.substring(0, domain.indexOf("(")).trim();
+ }
+ return domain;
+ }
+
+ /**
+ * Extracts the name of a gene from MEGA Gene annotation at the given column
+ * position, if any
+ *
+ * @param column
+ * @param geneAnnotation
+ * @return
+ */
+ protected static String getGeneFromAnnotation(int column,
+ AlignmentAnnotation geneAnnotation)
+ {
+ Annotation geneAnn = geneAnnotation == null ? null
+ : (column >= geneAnnotation.annotations.length ? null
+ : geneAnnotation.annotations[column]);
+ String gene = geneAnn == null ? null : geneAnn.description;
+ return gene;
+ }
+
+ /**
+ * Appends a !Domain header on change of Domain (or Domain/Gene) annotation.
+ * If it changes to a null value, appends a !Domain Property=domainend;
+ * statement
+ *
+ * @param sb
+ * buffer to append to
+ * @param column
+ * of alignment being output
+ * @param geneAnnotation
+ * "MEGA Gene" annotations
+ * @param domainAnnotation
+ * "MEGA Domain" annotations
+ */
+ protected void printGeneOrDomainHeader(StringBuilder sb, int column,
+ AlignmentAnnotation geneAnnotation,
+ AlignmentAnnotation domainAnnotation)
+ {
+ String gene = getGeneFromAnnotation(column, geneAnnotation);
+ String domain = getDomainFromAnnotation(column, domainAnnotation);
+ String property = getPropertyFromAnnotation(column, domainAnnotation);
+
+ if ((gene == null && currentGene != null)
+ || (gene == null && domain == null && currentDomain != null))
+ {
+ /*
+ * end of Gene or Domain annotation
+ */
+ sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
+ .append(currentDomain).append(SPACE).append(PROPERTY)
+ .append(EQUALS).append(DOMAINEND).append(SEMICOLON);
+ }
+ else if (gene != null && !gene.equals(currentGene))
+ {
+ /*
+ * start of a new Gene; output as "!Domain Gene=..." if we have domain
+ * annotation
+ */
+ if (domain != null)
+ {
+ sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
+ .append(domain).append(SPACE).append(GENE).append(EQUALS)
+ .append(gene);
+ if (property != null)
+ {
+ sb.append(SPACE).append(PROPERTY).append(EQUALS).append(property);
+ }
+ sb.append(SEMICOLON);
+ }
+ else
+ {
+ sb.append(newline).append(BANG).append(GENE).append(EQUALS)
+ .append(gene).append(SEMICOLON);
+ }
+ }
+ else if (domain != null && !domain.equals(currentGene))
+ {
+ /*
+ * change of domain within same (or no) gene
+ */
+ sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
+ .append(domain);
+ if (currentGene != null)
+ {
+ sb.append(SPACE).append(GENE).append(EQUALS).append(currentGene);
+ }
+ if (property != null)
+ {
+ sb.append(SPACE).append(PROPERTY).append(EQUALS).append(property);
+ }
+ sb.append(SEMICOLON);
+ }
+ currentGene = gene;
+ currentDomain = domain;
+ }
+
+ /**
* Outputs to string the MEGA header and any other known and relevant
* alignment properties
*
}
/**
- * Get the longest sequence length
+ * Get the longest sequence length (including gaps)
*
* @param s
* @return
this.nucleotide = al.isNucleotide();
/*
- * if the alignment has a "MEGA" annotation, we'll output its values as
- * !Label statements; MEGA only supports one of these
+ * "MEGA Gene", "MEGA Domain" or "MEGA Label" annotations can be output
*/
AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
if (anns != null)
{
+ this.annotations = new Vector<AlignmentAnnotation>();
for (AlignmentAnnotation ann : anns)
{
- if (MEGA_ANNOTATION_LABEL.equals(ann.label))
- {
- this.annotations = new Vector<AlignmentAnnotation>();
- annotations.add(ann);
- break;
- }
+ annotations.add(ann);
}
}
{
this.positionsPerLine = p;
}
+
+ /**
+ * Extracts the Coding / Noncoding property of a domain from MEGA Domain
+ * annotation at the given column position, if present
+ *
+ * @param column
+ * @param domainAnnotation
+ * @return
+ */
+ protected static String getPropertyFromAnnotation(int column,
+ AlignmentAnnotation domainAnnotation)
+ {
+ /*
+ * currently depends on parsing "Exon1 (Aspx Coding)" or similar
+ */
+ String property = null;
+ Annotation domainAnn = domainAnnotation == null ? null
+ : (column >= domainAnnotation.annotations.length ? null
+ : domainAnnotation.annotations[column]);
+ String domain = domainAnn == null ? null : domainAnn.description;
+ if (domain != null && domain.indexOf("(") > 0)
+ {
+ domain = domain.substring(domain.indexOf("(") +1);
+ if (domain.indexOf(SPACE) > -1 && domain.endsWith(")")) {
+ property = domain.substring(domain.indexOf(SPACE) + 1,
+ domain.length() - 1);
+ }
+ }
+ return property;
+ }
}
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
// normally output should match input
// we cheated here with a number of short input lines
// nb don't get Title in output if not calling print(AlignmentI)
- String expected = "#MEGA\n\n" + "#U455 ABCDEF [6]\n"
- + "#CPZANT MNOPQR [6]\n\n" + "#U455 KLMNOP [12]\n"
- + "#CPZANT WXYZGC [12]"
- + "\n";
+ //@formatter:off
+ String expected =
+ "#MEGA\n\n" +
+ "#U455 ABCDEF [6]\n" +
+ "#CPZANT MNOPQR [6]\n\n" +
+ "#U455 KLMNOP [12]\n" +
+ "#CPZANT WXYZGC [12]\n";
+ //@formatter:on
assertEquals("Print format wrong", expected, printed);
}
assertEquals("Roundtrip didn't match", expected,
formatted);
}
+
+ /**
+ * Test (parse and) print of MEGA data with !Gene statements.
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testPrint_genes() throws IOException
+ {
+ /*
+ * to keep the test concise, input data is in the exact format that Jalview
+ * would output it; the important thing is functional equivalence of input
+ * and output
+ */
+ //@formatter:off
+ String data = "#MEGA\n\n"+
+ "#Seq1 ABCD [4]\n" +
+ "#Seq2 MNOP [4]\n\n" +
+ "!Domain=Exon1 Gene=Adh Property=Coding;\n" +
+ "#Seq1 EFGHI [9]\n" +
+ "#Seq2 QRSTU [9]\n\n" +
+ "!Domain=Intron1 Gene=Adh Property=Noncoding;\n" +
+ "#Seq1 JK [11]\n" +
+ "#Seq2 VW [11]\n\n" +
+ "!Domain=Intron1 Property=domainend;\n" +
+ "#Seq1 LMN [14]\n" +
+ "#Seq2 XYZ [14]\n";
+ //@formatter:on
+ MegaFile testee = new MegaFile(data, AppletFormatAdapter.PASTE);
+ String printed = testee.print();
+ assertEquals("Print format wrong", data, printed);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetDomainFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[5];
+ anns[1] = new Annotation("", "Intron1", '0', 0f);
+ anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
+ anns[3] = new Annotation("", "Intron3 (Aspy Coding)", '0', 0f);
+ anns[4] = new Annotation("", "Intron4 (Coding)", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getDomainFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getDomainFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getDomainFromAnnotation(5, aa));
+ // domain with no Gene or Property:
+ assertEquals("Intron1", MegaFile.getDomainFromAnnotation(1, aa));
+ // domain with Gene but no Property:
+ assertEquals("Intron2", MegaFile.getDomainFromAnnotation(2, aa));
+ // domain with Gene and Property:
+ assertEquals("Intron3", MegaFile.getDomainFromAnnotation(3, aa));
+ // domain with Property and no Gene:
+ assertEquals("Intron4", MegaFile.getDomainFromAnnotation(4, aa));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetGeneFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[3];
+ anns[1] = new Annotation("", "Aspx", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getGeneFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getGeneFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getGeneFromAnnotation(3, aa));
+ // gene annotation:
+ assertEquals("Aspx", MegaFile.getGeneFromAnnotation(1, aa));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetPropertyFromAnnotation()
+ {
+ Annotation[] anns = new Annotation[5];
+ anns[1] = new Annotation("", "Intron1", '0', 0f);
+ anns[2] = new Annotation("", "Intron2 (Aspx)", '0', 0f);
+ anns[3] = new Annotation("", "Intron3 (Aspy Noncoding)", '0', 0f);
+ anns[4] = new Annotation("", "Exon1 (Aspx Coding)", '0', 0f);
+ AlignmentAnnotation aa = new AlignmentAnnotation("", "", anns);
+ // no annotations:
+ assertNull(MegaFile.getPropertyFromAnnotation(0, null));
+ // null annotation:
+ assertNull(MegaFile.getPropertyFromAnnotation(0, aa));
+ // column out of range:
+ assertNull(MegaFile.getPropertyFromAnnotation(5, aa));
+ // domain with no Gene or Property:
+ assertNull(MegaFile.getPropertyFromAnnotation(1, aa));
+ // domain with Gene but no Property:
+ assertNull(MegaFile.getPropertyFromAnnotation(2, aa));
+ // domain with Gene and Property:
+ assertEquals("Noncoding", MegaFile.getPropertyFromAnnotation(3, aa));
+ assertEquals("Coding", MegaFile.getPropertyFromAnnotation(4, aa));
+ }
}