import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
import java.io.IOException;
import java.util.ArrayList;
private static final String TAB = "\t";
+ private static final char DEFAULT_GAP = '-';
+
/*
* number of sequence positions output per line
*/
private String title;
// gap character may be explicitly declared, default is -
- private char gapCharacter = '-';
+ private char gapCharacter = DEFAULT_GAP;
// identity character if declared
private char identityCharacter = 0;
@Override
public void parse() throws IOException
{
- gapCharacter = '-';
+ gapCharacter = DEFAULT_GAP;
sequenceFeatures = new HashMap<String, List<SequenceFeature>>();
geneStart = new HashMap<String, Integer>();
domainStart = new HashMap<String, Integer>();
for (int i = 0; i < formatted.length(); i++)
{
char nextChar = formatted.charAt(i);
- if (nextChar != gapCharacter)
- {
- nonGapped++;
- }
- if (nextChar == identityCharacter
- && len + i < referenceSequence.length())
+ if (nextChar == gapCharacter)
{
- sb1.append(referenceSequence.charAt(len + i));
+ sb1.append(Comparison.isGap(nextChar) ? nextChar : DEFAULT_GAP);
}
else
{
- sb1.append(nextChar);
+ nonGapped++;
+ if (nextChar == identityCharacter
+ && len + i < referenceSequence.length())
+ {
+ sb1.append(referenceSequence.charAt(len + i));
+ }
+ else
+ {
+ sb1.append(nextChar);
+ }
}
}
formatted = sb1.toString();
else if (keyword.equalsIgnoreCase(INDEL))
{
this.gapCharacter = value.charAt(0);
+ if (!Comparison.isGap(gapCharacter))
+ {
+ System.err.println("Jalview doesn't support '" + gapCharacter
+ + "' for gaps, will be converted to '" + DEFAULT_GAP + "'");
+ }
}
else if (keyword.equalsIgnoreCase(IDENTICAL)
public void addProperties(AlignmentI al)
{
super.addProperties(al);
- al.setGapCharacter(gapCharacter);
/*
+ * record gap character specified, but convert to '-' if not one we support
+ */
+ al.setGapCharacter(Comparison.isGap(gapCharacter) ? gapCharacter
+ : DEFAULT_GAP);
+
+ /*
* warn if e.g. DataType=DNA but data is protein (or vice versa)
*/
if (this.nucleotide != null && this.nucleotide != al.isNucleotide()) {
import jalview.datamodel.SequenceI;
import java.io.IOException;
+import java.util.List;
import java.util.Vector;
import org.testng.annotations.Test;
verifySequenceFeature(sfs[2], "gene2", "Gene", 7, 12);
}
}
+
+ //@formatter:on
+
+ /**
+ * Test case where the declared gap character is one Jalview does not support;
+ * it should be converted to a '-'
+ *
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_weirdGapCharacter() throws IOException
+ {
+ //@formatter:off
+ String data = "#MEGA\n"+
+ "!TITLE Interleaved sequence data;\n" +
+ "!Format Identical=. Indel=%;\n\n" +
+ "#U455 %BC%EF\n" +
+ "#CPZANT M..P.R\n\n" +
+ "#U455 KLMNOP\n" +
+ "#CPZANT .%%Z..";
+ AppletFormatAdapter fa = new AppletFormatAdapter();
+ AlignmentI al = fa.readFile(data,
+ AppletFormatAdapter.PASTE, "MEGA");
+ //@formatter:on
+ List<SequenceI> seqs = al.getSequences();
+ assertEquals("First sequence data wrong", "-BC-EFKLMNOP", seqs.get(0)
+ .getSequenceAsString());
+ assertEquals("Second sequence data wrong", "MBCPERK--ZOP", seqs.get(1)
+ .getSequenceAsString());
+ assertEquals('-', al.getGapCharacter());
+ }
}