2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 import jalview.datamodel.AlignmentAnnotation;
22 import jalview.datamodel.AlignmentI;
23 import jalview.datamodel.Annotation;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceFeature;
26 import jalview.datamodel.SequenceI;
27 import jalview.schemes.UserColourScheme;
28 import jalview.util.Comparison;
30 import java.awt.Color;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.HashMap;
34 import java.util.Iterator;
35 import java.util.LinkedHashMap;
36 import java.util.List;
38 import java.util.Map.Entry;
40 import java.util.Vector;
43 * A parser for input or output of MEGA format files. <br>
45 * Tamura K, Stecher G, Peterson D, Filipski A, and Kumar S (2013) MEGA6:
46 * Molecular Evolutionary Genetics Analysis Version 6.0. Molecular Biology and
47 * Evolution 30: 2725-2729. <br>
50 * MEGA file format is supported as described in
51 * http://www.megasoftware.net/manual.pdf <br>
54 * <li>any comments (delimited by [ ]) are ignored and not preserved</li>
57 * @see http://www.megasoftware.net/
59 public class MegaFile extends AlignFile
61 private static final String MEGA = "MEGA";
63 private static final String MEGA_ANNOTATION_LABEL = MEGA + " Label";
65 private static final String MEGA_ANNOTATION_GENE = MEGA + " Gene";
67 private static final String MEGA_ANNOTATION_DOMAIN = MEGA + " Domain";
69 private static final char UNDERSCORE = '_';
71 private static final String WHITESPACE = "\\s+";
73 private static final int DEFAULT_LINE_LENGTH = 60;
75 private static final String INDENT = " ";
77 private static final String N_SITES = "NSites";
79 private static final String N_SEQS = "NSeqs";
81 private static final String MISSING = "Missing";
83 private static final String IDENTICAL = "Identical";
85 private static final String INDEL = "Indel";
87 private static final String CODETABLE = "CodeTable";
89 private static final String PROTEIN = "Protein";
91 private static final String NUCLEOTIDE = "Nucleotide";
93 private static final String DATATYPE = "DataType";
95 private static final char COMMENT_START = '[';
97 private static final char COMMENT_END = ']';
99 private static final String HASHSIGN = "#";
101 private static final String SEMICOLON = ";";
103 private static final String BANG = "!";
105 private static final String EQUALS = "=";
107 private static final String MEGA_ID = HASHSIGN + MEGA;
109 private static final String TITLE = "Title";
111 private static final String FORMAT = "Format";
113 private static final String DESCRIPTION = "Description";
115 private static final String GENE = "Gene";
117 private static final String DOMAIN = "Domain";
119 private static final String PROPERTY = "Property";
121 private static final String CODONSTART = "CodonStart";
123 private static final String DOMAINEND = "domainend";
125 private static final String LABEL = "Label";
128 * names of properties to save to the alignment (may affect eventual output
131 static final String PROP_TITLE = "MEGA_TITLE";
133 static final String PROP_INTERLEAVED = "MEGA_INTERLEAVED";
135 static final String PROP_DESCRIPTION = "MEGA_DESCRIPTION";
137 static final String PROP_CODETABLE = "MEGA_CODETABLE";
139 static final String PROP_IDENTITY = "MEGA_IDENTITY";
141 static final String PROP_MISSING = "MEGA_MISSING";
143 static final String PROP_DATATYPE = "MEGA_DATATYPE";
145 // number of bases per line of file (value is inferred)
146 static final String PROP_LINELENGTH = "MEGA_LINELENGTH";
148 // TODO: need a controlled name for Gene as a feature if we want to be able to
149 // output the MEGA file with !Gene headers
150 // WTF do we do if the sequences get realigned?
152 // initial size for sequence data buffer
153 private static final int SEQBUFFERSIZE = 256;
155 private static final String SPACE = " ";
157 private static final String TAB = "\t";
159 private static final char DEFAULT_GAP = '-';
162 * number of sequence positions output per line
164 private int positionsPerLine;
166 private String title;
168 // gap character may be explicitly declared, default is -
169 private char gapCharacter = DEFAULT_GAP;
171 // identity character if declared
172 private char identityCharacter = 0;
174 // this can be True, False or null (meaning not asserted in file)
175 private Boolean nucleotide;
177 // set once we have seen one block of interleaved data
178 private boolean seenAllSequences = false;
180 // this can be True, False or null (meaning we don't know yet)
181 private Boolean interleaved;
183 // write end of line positions as a comment
184 private boolean writePositionNumbers = true;
186 // id of sequence being processed
187 private String currentSequenceId;
190 * Temporary store of {sequenceId, positionData} while parsing interleaved
191 * sequences; sequences are maintained in the order in which they are added
192 * i.e. read in the file
194 Map<String, StringBuilder> seqData;
196 // number of residues read (so far) per sequence
197 Map<String, Integer> residuesRead;
199 // current Gene if any we are parsing
200 private String currentGene;
202 // start position in alignment (base 0) of current Gene
203 private int currentGeneStartCol;
205 // start residue (base 1) per sequence of current Gene
206 private Map<String, Integer> geneStart;
208 // current Domain if any we are parsing
209 private String currentDomain;
211 // start position in alignment (base 0) of current Domain
212 private int currentDomainStartCol;
214 // start residue (base 1) per sequence of current Domain
215 private Map<String, Integer> domainStart;
217 // map of SequenceFeature's by sequence id
218 private Map<String, List<SequenceFeature>> sequenceFeatures;
220 // each !Label line character becomes an Annotation (except underscores)
221 List<Annotation> labelAnnotations;
223 // records any declared Gene positions (including null values)
224 List<Annotation> geneAnnotations;
226 // records any declared Domain positions (including null values)
227 List<Annotation> domainAnnotations;
233 public MegaFile(String inFile, String type) throws IOException
238 public MegaFile(FileParse source) throws IOException
244 * Parse the input stream.
247 public void parse() throws IOException
249 gapCharacter = DEFAULT_GAP;
250 sequenceFeatures = new HashMap<String, List<SequenceFeature>>();
251 geneStart = new HashMap<String, Integer>();
252 domainStart = new HashMap<String, Integer>();
253 residuesRead = new HashMap<String, Integer>();
254 labelAnnotations = new ArrayList<Annotation>();
255 geneAnnotations = new ArrayList<Annotation>();
256 domainAnnotations = new ArrayList<Annotation>();
257 currentDomainStartCol = -1;
258 currentGeneStartCol = -1;
261 * Read and process MEGA and Title/Format/Description headers if present.
262 * Returns the first data line following the headers.
264 String dataLine = parseHeaderLines();
267 * order-preserving map to hold sequences by id as they are built up during
270 seqData = new LinkedHashMap<String, StringBuilder>();
273 * The id of the sequence being read (for non-interleaved)
275 currentSequenceId = "";
277 while (dataLine != null)
279 dataLine = dataLine.trim();
280 if (dataLine.length() > 0)
282 dataLine = dataLine.replace(TAB, SPACE);
283 String upperCased = dataLine.toUpperCase();
284 if (upperCased.startsWith(BANG + GENE.toUpperCase())
285 || upperCased.startsWith(BANG + DOMAIN.toUpperCase()))
288 parseGeneOrDomain(dataLine);
290 else if (upperCased.startsWith(BANG + LABEL.toUpperCase()))
292 parseLabel(dataLine);
297 currentSequenceId = parseDataLine(dataLine);
300 else if (!seqData.isEmpty())
303 * Blank line after processing some data...
307 dataLine = nextNonCommentLine();
311 * close off any features currently being parsed
313 createFeature(GENE, currentGene, geneStart);
314 createFeature(DOMAIN, currentDomain, domainStart);
316 extendAnnotation(geneAnnotations, currentGene, currentGeneStartCol);
317 extendAnnotation(domainAnnotations, currentDomain,
318 currentDomainStartCol);
320 // remember the (longest) line length read in, so we can output the same
321 setAlignmentProperty(PROP_LINELENGTH, String.valueOf(positionsPerLine));
323 deriveSequencesAndFeatures();
329 * Create AlignmentAnnotation for Label, Gene and Domain (provided at least
330 * one non-null annotation is present)
332 protected void deriveAnnotations()
334 deriveAnnotation(this.labelAnnotations, MEGA_ANNOTATION_LABEL);
335 deriveAnnotation(this.geneAnnotations, MEGA_ANNOTATION_GENE);
336 deriveAnnotation(this.domainAnnotations, MEGA_ANNOTATION_DOMAIN);
340 * Create and ad an AlignmentAnnotation (provided at least one non-null
341 * annotation is present)
346 protected void deriveAnnotation(List<Annotation> anns, String label)
348 if (anns.size() > 0 && hasNonNullEntry(anns))
350 Annotation[] annotationArray = anns.toArray(new Annotation[anns
352 AlignmentAnnotation aa = new AlignmentAnnotation(label, "",
354 this.annotations.add(aa);
358 protected static boolean hasNonNullEntry(List<? extends Object> l)
371 * Parse a !Label line. This contains a single character per position (column)
372 * of the alignment block above. An underscore character represents no label.
373 * Labels are assembled into an AlignmentAnnotation object.
376 * @return true if any non-null annotation was created
377 * @throws FileFormatException
379 protected boolean parseLabel(String dataLine) throws FileFormatException
381 // strip off leading !Label and following spaces
382 dataLine = dataLine.substring(LABEL.length() + 1).trim();
384 // remove internal spacing and any leading tab
385 String labels = dataLine.replace(SPACE, "");
386 if (labels.endsWith(SEMICOLON))
388 labels = labels.substring(0, labels.length() - 1);
392 System.err.println("Warning: '" + dataLine
393 + "' should end with semi-colon");
395 boolean added = false;
396 for (char c : labels.toCharArray())
400 this.labelAnnotations.add(null);
404 this.labelAnnotations.add(new Annotation(String.valueOf(c), "",
413 * Post-processing after reading one block of interleaved data
415 protected void endDataBlock()
417 padAnnotations(labelAnnotations);
421 * Append null annotations to keep the annotations list the same length as the
422 * sequences. This ensures that when the list is converted to an array it is
423 * correctly aligned with the alignment columns. It is needed when there are
424 * gaps in declared 'annotations' in a MEGA file, such as lines with no !Label
425 * statement, or regions between marked genes or domains.
429 protected void padAnnotations(List<Annotation> anns)
431 addNullAnnotations(anns, getAlignmentWidth());
435 * Append null annotations for positions up to (and excluding) the given end
441 protected void addNullAnnotations(List<Annotation> anns, int upTo)
443 int annotationsToAdd = upTo - anns.size();
444 for (int i = 0; i < annotationsToAdd; i++)
451 * Parse a !Gene or !Domain command line. MEGA accepts
453 * <li>!Gene=name;</li>
454 * <li>!Gene=name Property=Coding/Noncoding CodonStart=1/2/3;</li>
455 * <li>!Gene=genename Domain=domainname Property= etc</li>
456 * <li>!Domain=domainname Gene=genename Property= etc</li>
457 * <li>!Domain=domainname Property= etc</li>
458 * <li>!domain=domainname property=domainend</li>
460 * Properly, a Gene should be composed of Domain segments, but MEGA accepts
461 * without. Note that keywords don't seem to be case sensitive.
464 * @throws FileFormatException
466 protected void parseGeneOrDomain(String dataLine)
467 throws FileFormatException
469 String domain = null;
471 String property = null;
472 String codonStart = null;
473 String errorMsg = "Unrecognized format: " + dataLine;
475 if (!dataLine.startsWith(BANG) || !dataLine.endsWith(SEMICOLON))
477 throw new FileFormatException(errorMsg);
479 String trimmed = dataLine.substring(1, dataLine.length() - 1).trim();
480 String[] tokens = trimmed.split(WHITESPACE);
481 for (String token : tokens)
483 String[] keyValue = token.split("=");
484 if (keyValue.length != 2)
486 throw new FileFormatException(errorMsg);
488 String key = keyValue[0];
489 if (GENE.equalsIgnoreCase(key))
493 else if (DOMAIN.equalsIgnoreCase(key))
495 domain = keyValue[1];
497 else if (PROPERTY.equalsIgnoreCase(key))
499 property = keyValue[1];
501 else if (CODONSTART.equalsIgnoreCase(key))
503 codonStart = keyValue[1];
507 System.err.println("Unrecognised token: '" + key + "; in "
512 processGeneOrDomain(gene, domain, property, codonStart);
516 * Process a statement containing one or both of Gene and Domain, and
517 * optionally Property or CodonStart commands.
520 * the Gene name if specified, else null
522 * the Domain name if specified, else null
524 * the Property value if specified, else null
526 * the CodonStart value if specified, else null
528 protected void processGeneOrDomain(String gene, String domain,
529 String property, String codonStart)
532 * the order of processing below ensures that we correctly handle a domain
533 * in the context of an enclosing gene
535 processDomainEnd(domain, gene, property);
537 processGeneEnd(gene);
539 processGeneStart(gene);
541 processDomainStart(domain, property);
543 // TODO save codonStart if we plan to involve it in 'translate as cDNA'
547 * If we have declared a domain, and it is not continuing, start a sequence
553 protected void processDomainStart(String domain, String property)
555 if (DOMAINEND.equalsIgnoreCase(property))
557 currentDomain = null;
561 if (domain != null && !domain.equals(currentDomain))
563 String verboseDomain = makeVerboseDomainName(domain, property);
564 startSequenceFeature(domainStart);
565 currentDomainStartCol = getAlignmentWidth();
567 currentDomain = verboseDomain;
572 * Returns the width of alignment parsed so far. Note we assume (as does MEGA)
573 * that all sequences are the same length, so we can just take the length of
578 protected int getAlignmentWidth()
580 return seqData.isEmpty() ? 0 : seqData.values()
581 .iterator().next().length();
585 * If we have declared a gene, and it is not continuing, start a sequence
590 protected void processGeneStart(String gene)
592 if (gene != null && !gene.equals(currentGene))
594 startSequenceFeature(geneStart);
595 currentGeneStartCol = getAlignmentWidth();
601 * If we have been processing a domain, and it is not being continued, then
602 * make a sequence feature for the domain just ended. Criteria for the domain
603 * not being continued are either an explicit new domain or gene name, or a
604 * 'Property=domainend' statement
609 * @return true if a feature is created, else false
611 protected boolean processDomainEnd(String domain, String gene,
614 boolean newGene = (gene != null && !gene.equals(currentGene));
616 String verboseDomain = makeVerboseDomainName(domain, property);
618 if (this.currentDomain != null)
620 boolean newDomain = !this.currentDomain.equals(verboseDomain);
621 boolean domainEnded = DOMAINEND.equalsIgnoreCase(property);
622 if (newDomain || newGene || domainEnded)
624 createFeature(DOMAIN, currentDomain, domainStart);
625 // and/or... create annnotations for domain
626 extendAnnotation(domainAnnotations, currentDomain,
627 currentDomainStartCol);
628 currentDomain = null;
629 currentDomainStartCol = -1;
637 * If we have been processing a gene, and it is not being continued, then make
638 * a sequence feature for the gene just ended
641 * @return true if a feature is created, else false
643 protected boolean processGeneEnd(String gene)
645 boolean created = false;
647 * If we were processing a gene and now have either another, or none, create
648 * a sequence feature for that gene
650 if (this.currentGene != null && !this.currentGene.equals(gene))
652 createFeature(GENE, currentGene, geneStart);
653 // and/or... add annotations for Gene
654 extendAnnotation(geneAnnotations, currentGene, currentGeneStartCol);
656 currentGeneStartCol = -1;
664 * Helper method to add Annotation elements, with the given description and
665 * starting at the given start column, up to the end of the sequence length
666 * parsed so far. Null elements are inserted for any skipped columns since the
667 * last annotation (if any), i.e. positions with no annotation of this type.
672 * the start column of the annotations to add, or -1 if nothing to
675 protected void extendAnnotation(List<Annotation> anns,
676 String description, int startColumn)
678 int alignmentWidth = getAlignmentWidth();
679 addNullAnnotations(anns, startColumn == -1 ? alignmentWidth
682 int numberToAdd = alignmentWidth - anns.size();
685 Color col = description == null ? Color.black : UserColourScheme
686 .createColourFromName(description);
687 for (int i = 0; i < numberToAdd; i++)
689 anns.add(new Annotation("X", description, ' ', 0f, col));
695 * Makes an expanded descriptive name for Domain if possible e.g.
696 * "Intron1 (Adh Coding)". Currently incorporates the current gene name (if
697 * any) and the Coding/Noncoding property value (if given).
703 protected String makeVerboseDomainName(String domain, String property)
705 String verboseDomain = domain;
709 if ("Exon".equalsIgnoreCase(property)
710 || "Coding".equalsIgnoreCase(property))
714 else if ("Intron".equalsIgnoreCase(property)
715 || "Noncoding".equalsIgnoreCase(property))
717 coding = " Noncoding";
719 verboseDomain = domain
720 + (currentGene == null ? "" : " (" + currentGene + coding
723 return verboseDomain;
727 * Start processing a new feature
729 * @param startPositions
731 protected void startSequenceFeature(Map<String, Integer> startPositions)
734 * If the feature declaration precedes all sequences, we will know in
735 * createFeature that it started with residue 1; otherwise note now where it
736 * starts in each sequence
738 if (!residuesRead.isEmpty())
740 for (Entry<String, Integer> entry : residuesRead.entrySet())
742 String seqId = entry.getKey();
743 Integer nextResidue = entry.getValue() + 1;
744 startPositions.put(seqId, nextResidue);
750 * Add a SequenceFeature to each sequence, using the given start/end values
754 * @param featureValue
755 * @param featureStartResidues
757 protected void createFeature(String featureType, String featureValue,
758 Map<String, Integer> featureStartResidues)
760 if (featureValue == null)
765 Iterator<String> seqids = this.seqData.keySet().iterator();
766 while (seqids.hasNext())
768 String seqid = seqids.next();
769 Integer startAt = featureStartResidues.get(seqid);
770 int sfstart = startAt == null ? 1 : startAt.intValue();
771 int sfend = residuesRead.get(seqid);
772 if (sfend >= sfstart)
775 * don't add feature if entirely gapped in the sequence
777 // NB use 'colour feature by label' to show up distinct instances of
778 // feature type 'Gene' or 'Domain' on the alignment
779 SequenceFeature sf = new SequenceFeature(featureType, featureValue,
780 sfstart, sfend, 0f, null);
781 sequenceFeatures.get(seqid).add(sf);
787 * Returns the next line that is not a comment, or null at end of file.
788 * Comments in MEGA are within [ ] brackets, and may be nested.
791 * @throws FileFormatException
793 protected String nextNonCommentLine() throws FileFormatException
795 return nextNonCommentLine(0);
799 * Returns the next non-comment line (or part line), or null at end of file.
800 * Comments in MEGA are within [ ] brackets, and may be nested. They may occur
801 * anywhere within a line (for example at the end with position numbers); this
802 * method returns the line with any comments removed.
805 * current depth of nesting of comments while parsing
807 * @throws FileFormatException
809 protected String nextNonCommentLine(final int depth)
810 throws FileFormatException
816 } catch (IOException e)
818 throw new FileFormatException(e.getMessage());
824 System.err.println("Warning: unterminated comment in data file");
830 * If we are in a (possibly nested) comment after parsing this line, keep
831 * reading recursively until the comment has unwound
833 int newDepth = commentDepth(data, depth);
836 return nextNonCommentLine(newDepth);
841 * not in a comment by end of this line; return what is left
843 String nonCommentPart = getNonCommentContent(data, depth);
844 return nonCommentPart;
849 * Returns what is left of the input data after removing any comments, whether
850 * 'in progress' from preceding lines, or embedded in the current line
855 * nested depth of comments pending termination
857 * @throws FileFormatException
859 protected static String getNonCommentContent(String data, int depth)
860 throws FileFormatException
862 int len = data.length();
863 StringBuilder result = new StringBuilder(len);
864 for (int i = 0; i < len; i++)
866 char c = data.charAt(i);
891 return result.toString();
895 * Calculates new depth of comment after parsing an input line i.e. the excess
896 * of opening '[' over closing ']' characters. Any excess ']' are ignored (not
897 * treated as comment delimiters).
902 * current comment nested depth before parsing the line
903 * @return new depth after parsing the line
905 protected static int commentDepth(CharSequence data, int depth)
907 int newDepth = depth;
908 int len = data.length();
909 for (int i = 0; i < len; i++)
911 char c = data.charAt(i);
912 if (c == COMMENT_START)
916 else if (c == COMMENT_END && newDepth > 0)
925 * Convert the parsed sequence strings to objects and store them in the model.
927 protected void deriveSequencesAndFeatures()
929 Set<Entry<String, StringBuilder>> datasets = seqData.entrySet();
931 for (Entry<String, StringBuilder> dataset : datasets)
933 String sequenceId = dataset.getKey();
934 StringBuilder characters = dataset.getValue();
935 SequenceI s = new Sequence(sequenceId, new String(characters));
936 this.seqs.addElement(s);
939 * and add any derived sequence features to the sequence
941 for (SequenceFeature sf : sequenceFeatures.get(sequenceId))
943 s.addSequenceFeature(sf);
949 * Process one line of sequence data. If it has no sequence identifier, append
950 * to the current id's sequence. Else parse out the sequence id and append the
951 * data (if any) to that id's sequence. Returns the sequence id (implicit or
952 * explicit) for this line.
956 * @throws FileFormatException
958 protected String parseDataLine(String dataLine)
959 throws FileFormatException
961 String seqId = getSequenceId(dataLine);
965 * Just character data
967 parseNoninterleavedDataLine(dataLine);
968 return currentSequenceId;
970 else if ((HASHSIGN + seqId).trim().equals(dataLine.trim()))
973 * Sequence id only - header line for noninterleaved data
980 * Sequence id followed by data
982 parseInterleavedDataLine(dataLine, seqId);
988 * Add a line of sequence data to the buffer for the given sequence id. Start
989 * a new one if we haven't seen it before.
992 * @throws FileFormatException
994 protected void parseNoninterleavedDataLine(String dataLine)
995 throws FileFormatException
997 if (currentSequenceId == null)
1000 * Oops. Data but no sequence id context.
1002 throw new FileFormatException("No sequence id context at: "
1006 assertInterleaved(false, dataLine);
1008 dataLine = addSequenceData(currentSequenceId, dataLine);
1010 setPositionsPerLine(Math.max(positionsPerLine, dataLine.length()));
1014 * Get the sequence data for this sequence id, starting a new one if
1020 protected StringBuilder getSequenceDataBuffer(String currentId)
1022 StringBuilder sb = seqData.get(currentId);
1025 // first data met for this sequence id, start a new buffer
1026 sb = new StringBuilder(SEQBUFFERSIZE);
1027 seqData.put(currentId, sb);
1029 // and a placeholder for any SequenceFeature found
1030 sequenceFeatures.put(currentId, new ArrayList<SequenceFeature>());
1035 * we are appending to a previously seen sequence; flag that we have seen
1038 this.seenAllSequences = true;
1044 * Parse one line of interleaved data e.g.
1047 * #TheSeqId CGATCGCATGCA
1052 * @throws FileFormatException
1054 protected void parseInterleavedDataLine(String dataLine, String seqId)
1055 throws FileFormatException
1058 * New sequence found in second or later data block - error.
1060 if (this.seenAllSequences && !seqData.containsKey(seqId))
1062 throw new FileFormatException(
1063 "Parse error: misplaced new sequence starting at " + dataLine);
1066 String data = dataLine.substring(seqId.length() + 1).trim();
1069 * Do nothing if this line is _only_ a sequence id with no data following.
1071 if (data != null && data.length() > 0)
1073 data = addSequenceData(seqId, data);
1074 setPositionsPerLine(Math.max(positionsPerLine, data.length()));
1075 assertInterleaved(true, dataLine);
1080 * Remove spaces, and replace identity symbol, before appending the sequence
1081 * data to the buffer for the sequence id. Returns the reformatted added data.
1082 * Also updates a count of residues read for the sequence.
1088 protected String addSequenceData(String seqId, String data)
1090 StringBuilder sb = getSequenceDataBuffer(seqId);
1091 int len = sb.length();
1092 String formatted = data.replace(SPACE, "");
1095 * If sequence contains '.' or other identity symbol; replace these with the
1096 * same position from the first (reference) sequence
1099 StringBuilder referenceSequence = seqData.values().iterator().next();
1100 StringBuilder sb1 = new StringBuilder(formatted.length());
1101 for (int i = 0; i < formatted.length(); i++)
1103 char nextChar = formatted.charAt(i);
1104 if (nextChar == gapCharacter)
1106 sb1.append(Comparison.isGap(nextChar) ? nextChar : DEFAULT_GAP);
1111 if (nextChar == identityCharacter
1112 && len + i < referenceSequence.length())
1114 sb1.append(referenceSequence.charAt(len + i));
1118 sb1.append(nextChar);
1122 formatted = sb1.toString();
1128 * increment residue count for the sequence
1132 Integer residueCount = residuesRead.get(seqId);
1133 residuesRead.put(seqId, nonGapped
1134 + (residueCount == null ? 0 : residueCount));
1141 * If the line begins with (e.g.) "#abcde " then returns "abcde" as the
1142 * identifier. Else returns null.
1147 public static String getSequenceId(String dataLine)
1149 // TODO refactor to a StringUtils type class
1150 if (dataLine != null)
1152 if (dataLine.startsWith(HASHSIGN))
1154 int spacePos = dataLine.indexOf(" ");
1155 return (spacePos == -1 ? dataLine.substring(1) : dataLine
1156 .substring(1, spacePos));
1163 * Read the #MEGA and Title/Format/Description header lines (if present).
1165 * Save as alignment properties in case useful.
1167 * @return the next non-blank line following the header lines.
1168 * @throws FileFormatException
1170 protected String parseHeaderLines() throws FileFormatException
1172 String inputLine = null;
1173 while ((inputLine = nextNonCommentLine()) != null)
1175 inputLine = inputLine.trim();
1180 if (inputLine.length() == 0)
1185 if (inputLine.toUpperCase().startsWith(MEGA_ID))
1190 if (isTitle(inputLine))
1192 this.title = getValue(inputLine);
1193 setAlignmentProperty(PROP_TITLE, title);
1195 else if (inputLine.startsWith(BANG + DESCRIPTION))
1197 parseDescription(inputLine);
1200 else if (inputLine.startsWith(BANG + FORMAT))
1202 parseFormat(inputLine);
1204 else if (!inputLine.toUpperCase().startsWith(MEGA_ID))
1208 * Return the first 'data line' i.e. one that is not blank, #MEGA or
1218 * Parse a !Format statement. This may be multiline, and is ended by a
1222 * @throws FileFormatException
1224 protected void parseFormat(String inputLine) throws FileFormatException
1226 while (inputLine != null)
1228 parseFormatLine(inputLine);
1229 if (inputLine.endsWith(SEMICOLON))
1233 inputLine = nextNonCommentLine();
1238 * Parse one line of a !Format statement. This may contain one or more
1239 * keyword=value pairs.
1242 * @throws FileFormatException
1244 protected void parseFormatLine(String inputLine)
1245 throws FileFormatException
1247 if (inputLine.startsWith(BANG + FORMAT))
1249 inputLine = inputLine.substring((BANG + FORMAT).length());
1251 if (inputLine.endsWith(SEMICOLON))
1253 inputLine = inputLine.substring(0, inputLine.length() - 1);
1255 if (inputLine.length() == 0)
1259 String[] tokens = inputLine.trim().split(WHITESPACE);
1260 for (String token : tokens)
1262 parseFormatKeyword(token);
1267 * Parse a Keyword=Value token. Possible keywords are
1269 * <li>DataType= DNA, RNA, Nucleotide, Protein</li>
1270 * <li>DataFormat= Interleaved, ?</li>
1271 * <li>NSeqs= number of sequences (synonym NTaxa)</li>
1272 * <li>NSites= number of bases / residues</li>
1273 * <li>Property= Exon (or Coding), Intron (or Noncoding), End (of domain)</li>
1274 * <li>Indel= gap character</li>
1275 * <li>Identical= identity character (to first sequence) (synonym MatchChar)</li>
1276 * <li>Missing= missing data character</li>
1277 * <li>CodeTable= Standard, other (MEGA supports various)</li>
1281 * @throws FileFormatException
1282 * if an unrecognised keyword or value is encountered
1284 protected void parseFormatKeyword(String token)
1285 throws FileFormatException
1287 String msg = "Unrecognised Format command: " + token;
1288 String[] bits = token.split(EQUALS);
1289 if (bits.length != 2)
1291 throw new FileFormatException(msg);
1293 String keyword = bits[0];
1294 String value = bits[1];
1297 * Jalview will work out whether nucleotide or not anyway
1299 if (keyword.equalsIgnoreCase(DATATYPE))
1301 if (value.equalsIgnoreCase("DNA") || value.equalsIgnoreCase("RNA")
1302 || value.equalsIgnoreCase("Nucleotide"))
1304 this.nucleotide = true;
1305 // alignment computes whether or not it is nucleotide when created
1307 else if (value.equalsIgnoreCase(PROTEIN))
1309 this.nucleotide = false;
1313 throw new FileFormatException(msg);
1315 setAlignmentProperty(PROP_DATATYPE, value);
1319 * accept non-Standard code table but save in case we want to disable
1320 * 'translate as cDNA'
1322 else if (keyword.equalsIgnoreCase(CODETABLE))
1324 setAlignmentProperty(PROP_CODETABLE, value);
1328 * save gap char to set later on alignment once created
1330 else if (keyword.equalsIgnoreCase(INDEL))
1332 this.gapCharacter = value.charAt(0);
1333 if (!Comparison.isGap(gapCharacter))
1335 System.err.println("Jalview doesn't support '" + gapCharacter
1336 + "' for gaps, will be converted to '" + DEFAULT_GAP + "'");
1340 else if (keyword.equalsIgnoreCase(IDENTICAL)
1341 || keyword.equalsIgnoreCase("MatchChar"))
1343 setAlignmentProperty(PROP_IDENTITY, value);
1344 this.identityCharacter = value.charAt(0);
1345 if (!".".equals(value))
1347 System.err.println("Warning: " + token
1348 + " not supported, Jalview uses '.' for identity");
1352 else if (keyword.equalsIgnoreCase(MISSING))
1354 setAlignmentProperty(PROP_MISSING, value);
1355 System.err.println("Warning: " + token + " not supported");
1358 else if (keyword.equalsIgnoreCase(PROPERTY))
1360 // TODO: can Property appear in a Format command?
1361 // suspect this is a mistake in the manual
1364 else if (!keyword.equalsIgnoreCase(N_SEQS)
1365 && !keyword.equalsIgnoreCase("NTaxa")
1366 && !keyword.equalsIgnoreCase(N_SITES))
1368 System.err.println("Warning: " + msg);
1373 * Returns the trimmed data on the line following either whitespace or '=',
1374 * with any trailing semi-colon removed<br>
1377 * <li>Hello World</li>
1378 * <li>!Hello: \tWorld;</li>
1379 * <li>!Hello=World</li>
1381 * should all return "World"
1386 protected static String getValue(String inputLine)
1388 if (inputLine == null)
1392 String value = null;
1393 String s = inputLine.replaceAll("\t", " ").trim();
1396 * KEYWORD = VALUE should return VALUE
1398 int equalsPos = s.indexOf("=");
1401 value = s.substring(equalsPos + 1);
1405 int spacePos = s.indexOf(' ');
1406 value = spacePos == -1 ? "" : s.substring(spacePos + 1);
1408 value = value.trim();
1409 if (value.endsWith(SEMICOLON))
1411 value = value.substring(0, value.length() - 1).trim();
1417 * Returns true if the input line starts with "TITLE" or "!TITLE" (not case
1418 * sensitive). The latter is the official format, some older data file
1419 * examples have it without the !.
1424 protected static boolean isTitle(String inputLine)
1426 if (inputLine == null)
1430 String upper = inputLine.toUpperCase();
1431 return (upper.startsWith(TITLE.toUpperCase()) || upper.startsWith(BANG
1432 + TITLE.toUpperCase()));
1436 * Reads lines until terminated by semicolon, appending each to the
1437 * Description property value.
1439 * @throws FileFormatException
1441 protected void parseDescription(String firstDescriptionLine)
1442 throws FileFormatException
1444 StringBuilder desc = new StringBuilder(256);
1445 desc.append(getValue(firstDescriptionLine));
1446 if (!firstDescriptionLine.endsWith(SEMICOLON))
1448 String line = nextNonCommentLine();
1449 while (line != null)
1451 if (line.endsWith(SEMICOLON))
1453 desc.append(line.substring(0, line.length() - 1));
1456 else if (line.length() > 0)
1458 desc.append(line).append(newline);
1460 line = nextNonCommentLine();
1463 setAlignmentProperty(PROP_DESCRIPTION, desc.toString());
1467 * Returns the alignment sequences in Mega format.
1470 public String print()
1472 return MEGA_ID + newline + print(getSeqsAsArray());
1476 * Write out the alignment sequences in Mega format - interleaved unless
1477 * explicitly noninterleaved.
1479 protected String print(SequenceI[] s)
1482 if (this.interleaved != null && !this.interleaved)
1484 result = printNonInterleaved(s);
1488 result = printInterleaved(s);
1494 * Print to string in Interleaved format - blocks of next N characters of each
1499 protected String printInterleaved(SequenceI[] s)
1501 int maxIdLength = getMaxIdLength(s);
1502 int maxSequenceLength = getMaxSequenceLength(s);
1504 int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
1505 int chunksPerLine = (positionsPerLine + spaceEvery - 1) / spaceEvery;
1508 * Roughly size a buffer to hold the whole output
1510 int numLines = maxSequenceLength / positionsPerLine + 3; // approx
1511 StringBuilder sb = new StringBuilder(numLines
1512 * (maxIdLength + positionsPerLine + chunksPerLine + 10));
1515 * Output as: #Seqid CGT AGC ACT ... or blocks of 10 for peptide
1517 AlignmentAnnotation geneAnnotation = findAnnotation(MEGA_ANNOTATION_GENE);
1518 AlignmentAnnotation domainAnnotation = findAnnotation(MEGA_ANNOTATION_DOMAIN);
1520 currentDomain = null;
1523 while (from < maxSequenceLength)
1525 printGeneOrDomainHeader(sb, from, geneAnnotation, domainAnnotation);
1526 int maxCol = from + positionsPerLine; // exclusive
1527 for (int col = from; col <= maxCol; col++)
1529 if (geneOrDomainChanged(col, geneAnnotation, domainAnnotation)
1533 * print a block of sequences up to [col-1]
1536 boolean first = true;
1538 for (SequenceI seq : s)
1541 String seqId = String.format("#%-" + maxIdLength + "s",
1545 * output next line for this sequence
1548 for (int j = 0; j < chunksPerLine; j++)
1550 char[] subSequence = seq.getSequence(seqFrom,
1551 Math.min(col, seqFrom + spaceEvery));
1552 if (subSequence.length > 0)
1554 sb.append(SPACE).append(subSequence);
1556 seqFrom += subSequence.length;
1559 // all sequences should be the same length in MEGA
1560 advancedBy += subSequence.length;
1563 // write last position as a comment
1564 if (writePositionNumbers)
1566 sb.append(SPACE).append(COMMENT_START)
1567 .append(String.valueOf(from + advancedBy))
1568 .append(COMMENT_END);
1573 sb.append(printLabel(from, advancedBy, maxIdLength));
1580 return new String(sb);
1584 * Returns true if we detect a change of gene or domain at the given column
1585 * position. Currently done by inspecting any "MEGA Gene" or "MEGA Domain"
1586 * annotation for the column.
1589 * @param geneAnnotation
1590 * @param domainAnnotation
1593 protected boolean geneOrDomainChanged(int column,
1594 AlignmentAnnotation geneAnnotation,
1595 AlignmentAnnotation domainAnnotation)
1597 String gene = getGeneFromAnnotation(column, geneAnnotation);
1598 String domain = getDomainFromAnnotation(column, domainAnnotation);
1599 boolean domainEnd = domain != null
1600 && domain.toUpperCase().indexOf(DOMAINEND.toUpperCase()) != -1;
1602 if ((domainEnd || gene == null) && currentGene != null)
1606 else if (gene != null && !gene.equals(currentGene))
1615 * Extracts the name of a domain from MEGA Domain annotation at the given
1616 * column position, if any
1619 * @param domainAnnotation
1622 protected static String getDomainFromAnnotation(int column,
1623 AlignmentAnnotation domainAnnotation)
1625 Annotation domainAnn = domainAnnotation == null ? null
1626 : (column >= domainAnnotation.annotations.length ? null
1627 : domainAnnotation.annotations[column]);
1628 String domain = domainAnn == null ? null : domainAnn.description;
1629 if (domain != null && domain.indexOf("(") > 0)
1631 domain = domain.substring(0, domain.indexOf("(")).trim();
1637 * Extracts the name of a gene from MEGA Gene annotation at the given column
1641 * @param geneAnnotation
1644 protected static String getGeneFromAnnotation(int column,
1645 AlignmentAnnotation geneAnnotation)
1647 Annotation geneAnn = geneAnnotation == null ? null
1648 : (column >= geneAnnotation.annotations.length ? null
1649 : geneAnnotation.annotations[column]);
1650 String gene = geneAnn == null ? null : geneAnn.description;
1655 * Appends a !Domain header on change of Domain (or Domain/Gene) annotation.
1656 * If it changes to a null value, appends a !Domain Property=domainend;
1660 * buffer to append to
1662 * of alignment being output
1663 * @param geneAnnotation
1664 * "MEGA Gene" annotations
1665 * @param domainAnnotation
1666 * "MEGA Domain" annotations
1668 protected void printGeneOrDomainHeader(StringBuilder sb, int column,
1669 AlignmentAnnotation geneAnnotation,
1670 AlignmentAnnotation domainAnnotation)
1672 String gene = getGeneFromAnnotation(column, geneAnnotation);
1673 String domain = getDomainFromAnnotation(column, domainAnnotation);
1674 String property = getPropertyFromAnnotation(column, domainAnnotation);
1676 if ((gene == null && currentGene != null)
1677 || (gene == null && domain == null && currentDomain != null))
1680 * end of Gene or Domain annotation
1682 sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
1683 .append(currentDomain).append(SPACE).append(PROPERTY)
1684 .append(EQUALS).append(DOMAINEND).append(SEMICOLON);
1686 else if (gene != null && !gene.equals(currentGene))
1689 * start of a new Gene; output as "!Domain Gene=..." if we have domain
1694 sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
1695 .append(domain).append(SPACE).append(GENE).append(EQUALS)
1697 if (property != null)
1699 sb.append(SPACE).append(PROPERTY).append(EQUALS).append(property);
1701 sb.append(SEMICOLON);
1705 sb.append(newline).append(BANG).append(GENE).append(EQUALS)
1706 .append(gene).append(SEMICOLON);
1709 else if (domain != null && !domain.equals(currentGene))
1712 * change of domain within same (or no) gene
1714 sb.append(newline).append(BANG).append(DOMAIN).append(EQUALS)
1716 if (currentGene != null)
1718 sb.append(SPACE).append(GENE).append(EQUALS).append(currentGene);
1720 if (property != null)
1722 sb.append(SPACE).append(PROPERTY).append(EQUALS).append(property);
1724 sb.append(SEMICOLON);
1727 currentDomain = domain;
1731 * Outputs to string the MEGA header and any other known and relevant
1732 * alignment properties
1736 protected String printHeaders(AlignmentI al)
1738 StringBuilder sb = new StringBuilder(128);
1739 sb.append(MEGA_ID).append(newline);
1740 String propertyValue = (String) al.getProperty(PROP_TITLE);
1741 if (propertyValue != null)
1743 sb.append(BANG).append(TITLE).append(SPACE).append(propertyValue)
1744 .append(SEMICOLON).append(newline);
1746 propertyValue = (String) al.getProperty(PROP_DESCRIPTION);
1747 if (propertyValue != null)
1749 sb.append(BANG).append(DESCRIPTION).append(newline)
1750 .append(propertyValue).append(SEMICOLON)
1755 * !Format DataType CodeTable
1757 sb.append(BANG).append(FORMAT).append(newline);
1758 String dataType = (String) al.getProperty(PROP_DATATYPE);
1759 if (dataType == null)
1761 dataType = al.isNucleotide() ? NUCLEOTIDE : PROTEIN;
1763 sb.append(INDENT).append(DATATYPE).append(EQUALS).append(dataType);
1764 String codeTable = (String) al.getProperty(PROP_CODETABLE);
1765 sb.append(SPACE).append(CODETABLE).append(EQUALS)
1766 .append(codeTable == null ? "Standard" : codeTable)
1770 * !Format NSeqs NSites (the length of sequences - they should all be the
1771 * same - including gaps)
1773 sb.append(INDENT).append(N_SEQS).append(EQUALS).append(al.getHeight());
1774 sb.append(SPACE).append(N_SITES).append(EQUALS)
1775 .append(String.valueOf(al.getWidth()));
1779 * !Format Indel Identical Missing
1782 sb.append(INDEL).append(EQUALS).append(al.getGapCharacter());
1783 String identity = (String) al.getProperty(PROP_IDENTITY);
1784 if (identity != null)
1786 sb.append(SPACE).append(IDENTICAL).append(EQUALS).append(identity);
1788 String missing = (String) al.getProperty(PROP_MISSING);
1789 if (missing != null)
1791 sb.append(SPACE).append(MISSING).append(EQUALS).append(missing);
1793 sb.append(SEMICOLON).append(newline);
1795 return sb.toString();
1799 * Get the longest sequence id (to allow aligned printout).
1804 protected static int getMaxIdLength(SequenceI[] s)
1806 // TODO pull up for reuse
1808 for (SequenceI seq : s)
1810 int len = seq.getName().length();
1811 if (len > maxLength)
1820 * Get the longest sequence length (including gaps)
1825 protected static int getMaxSequenceLength(SequenceI[] s)
1827 // TODO pull up for reuse
1829 for (SequenceI seq : s)
1831 int len = seq.getLength();
1832 if (len > maxLength)
1841 * Print to string in noninterleaved format - all of each sequence in turn, in
1842 * blocks of 50 characters.
1847 protected String printNonInterleaved(SequenceI[] s)
1849 int maxSequenceLength = getMaxSequenceLength(s);
1851 int numLines = maxSequenceLength / positionsPerLine + 2 + s.length;
1854 * Roughly size a buffer to hold the whole output
1856 StringBuilder sb = new StringBuilder(numLines * positionsPerLine);
1858 for (SequenceI seq : s)
1860 printSequence(sb, seq);
1863 return new String(sb);
1867 * Append a formatted complete sequence to the string buffer
1872 protected void printSequence(StringBuilder sb, SequenceI seq)
1874 int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
1875 // round down to output a whole number of spaced blocks
1876 int chunksPerLine = positionsPerLine / spaceEvery;
1879 sb.append(HASHSIGN + seq.getName()).append(newline);
1881 while (startPos < seq.getLength())
1884 * print next line for this sequence
1886 boolean firstChunk = true;
1887 int lastPos = startPos + positionsPerLine; // exclusive
1888 for (int j = 0; j < chunksPerLine; j++)
1890 char[] subSequence = seq.getSequence(startPos,
1891 Math.min(lastPos, startPos + spaceEvery));
1892 if (subSequence.length > 0)
1898 sb.append(subSequence);
1901 startPos += subSequence.length;
1903 // line end position (base 1) as a comment
1904 sb.append(SPACE).append(COMMENT_START).append(startPos)
1905 .append(COMMENT_END);
1911 * Returns a formatted string like <br>
1912 * !Label aa_b_ ab_b_ <br>
1913 * where underscore represents no annotation, any other character a MEGA label
1915 * Returns an empty string if there is no non-null annotation in the given
1919 * start column of the alignment (base 0)
1921 * number of positions to output
1923 * padded width of !Label statement to output
1926 protected String printLabel(int fromPos, int positions, int labelWidth)
1928 int spaceEvery = this.nucleotide != null && this.nucleotide ? 3 : 10;
1930 AlignmentAnnotation ann = findAnnotation(MEGA_ANNOTATION_LABEL);
1936 StringBuilder sb = new StringBuilder(positions + 20);
1937 sb.append(String.format("%-" + labelWidth + "s ", BANG + LABEL));
1938 Annotation[] anns = annotations.get(0).annotations;
1939 int blockCharCount = 0;
1940 boolean annotationFound = false;
1942 for (int i = fromPos; i < fromPos + positions; i++)
1944 String label = String.valueOf(UNDERSCORE);
1945 if (i < anns.length && anns[i] != null)
1947 label = anns[i].displayCharacter;
1950 if (label.charAt(0) != UNDERSCORE)
1952 annotationFound = true;
1954 // add a space after each block except the last
1955 if (++blockCharCount % spaceEvery == 0
1956 && (i < fromPos + positions - 1))
1961 sb.append(SEMICOLON).append(newline);
1963 return annotationFound ? sb.toString() : none;
1967 * Returns the first stored annotation found with the given label, or null
1969 * @param annotationLabel
1972 protected AlignmentAnnotation findAnnotation(String annotationLabel)
1974 if (annotations != null)
1976 for (AlignmentAnnotation ann : annotations)
1978 if (annotationLabel.equals(ann.label))
1988 * Flag this file as interleaved or not, based on data format. Throws an
1989 * exception if has previously been determined to be otherwise.
1993 * @throws FileFormatException
1995 protected void assertInterleaved(boolean isIt, String dataLine)
1996 throws FileFormatException
1998 if (this.interleaved != null && isIt != this.interleaved.booleanValue())
2000 throw new FileFormatException("Parse error: interleaved was " + !isIt
2001 + " but now seems to be " + isIt + ", at line: " + dataLine);
2003 this.interleaved = new Boolean(isIt);
2004 setAlignmentProperty(PROP_INTERLEAVED, interleaved.toString());
2007 public boolean isInterleaved()
2009 return this.interleaved == null ? false : this.interleaved
2014 * Adds saved parsed values either as alignment properties, or (in some cases)
2015 * as specific member fields of the alignment
2018 public void addProperties(AlignmentI al)
2020 super.addProperties(al);
2023 * record gap character specified, but convert to '-' if not one we support
2025 al.setGapCharacter(Comparison.isGap(gapCharacter) ? gapCharacter
2029 * warn if e.g. DataType=DNA but data is protein (or vice versa)
2031 if (this.nucleotide != null && this.nucleotide != al.isNucleotide()) {
2032 System.err.println("Warning: " + this.title + " declared "
2033 + (nucleotide ? "" : " not ") + "nucleotide but it is"
2034 + (nucleotide ? " not" : ""));
2039 * Print the given alignment in MEGA format. If the alignment was created by
2040 * parsing a MEGA file, it should have properties set (e.g. Title) which can
2041 * surface in the output.
2044 public String print(AlignmentI al)
2046 this.nucleotide = al.isNucleotide();
2049 * "MEGA Gene", "MEGA Domain" or "MEGA Label" annotations can be output
2051 AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
2054 this.annotations = new Vector<AlignmentAnnotation>();
2055 for (AlignmentAnnotation ann : anns)
2057 annotations.add(ann);
2061 String lineLength = (String) al.getProperty(PROP_LINELENGTH);
2062 this.positionsPerLine = lineLength == null ? DEFAULT_LINE_LENGTH : Integer
2063 .parseInt(lineLength);
2066 * round down to a multiple of 3 positions per line for nucleotide
2070 positionsPerLine = positionsPerLine - (positionsPerLine % 3);
2073 String interleave = (String) al.getProperty(PROP_INTERLEAVED);
2074 if (interleave != null)
2076 this.interleaved = Boolean.valueOf(interleave);
2079 String headers = printHeaders(al);
2080 return headers + print(al.getSequencesArray());
2084 * Returns the number of sequence positions output per line
2088 public int getPositionsPerLine()
2090 return positionsPerLine;
2094 * Sets the number of sequence positions output per line. Note these will be
2095 * formatted in blocks of 3 (nucleotide) or 10 (peptide).
2099 public void setPositionsPerLine(int p)
2101 this.positionsPerLine = p;
2105 * Extracts the Coding / Noncoding property of a domain from MEGA Domain
2106 * annotation at the given column position, if present
2109 * @param domainAnnotation
2112 protected static String getPropertyFromAnnotation(int column,
2113 AlignmentAnnotation domainAnnotation)
2116 * currently depends on parsing "Exon1 (Aspx Coding)" or similar
2118 String property = null;
2119 Annotation domainAnn = domainAnnotation == null ? null
2120 : (column >= domainAnnotation.annotations.length ? null
2121 : domainAnnotation.annotations[column]);
2122 String domain = domainAnn == null ? null : domainAnn.description;
2123 if (domain != null && domain.indexOf("(") > 0)
2125 domain = domain.substring(domain.indexOf("(") +1);
2126 if (domain.indexOf(SPACE) > -1 && domain.endsWith(")")) {
2127 property = domain.substring(domain.indexOf(SPACE) + 1,
2128 domain.length() - 1);