From 063142d5fe1679310fd6509ad78ef94c5b345f11 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 22 Aug 2016 15:38:54 +0100 Subject: [PATCH] JAL-2175 code/test fixes/tidy for PFAM/MSF/JSON roundtrip --- src/jalview/io/JSONFile.java | 4 + src/jalview/io/MSFfile.java | 80 +++++++++--------- src/jalview/io/PfamFile.java | 2 +- test/jalview/io/FormatAdapterTest.java | 138 ++++++++++++++++++++++++++++++++ test/jalview/io/StockholmFileTest.java | 72 ++++++++--------- 5 files changed, 221 insertions(+), 75 deletions(-) create mode 100644 test/jalview/io/FormatAdapterTest.java diff --git a/src/jalview/io/JSONFile.java b/src/jalview/io/JSONFile.java index 3cda444..653c071 100644 --- a/src/jalview/io/JSONFile.java +++ b/src/jalview/io/JSONFile.java @@ -735,6 +735,10 @@ public class JSONFile extends AlignFile implements ComplexAlignFile @Override public void configureForView(AlignmentViewPanel avpanel) { + if (avpanel == null) + { + return; + } super.configureForView(avpanel); AlignViewportI viewport = avpanel.getAlignViewport(); AlignmentI alignment = viewport.getAlignment(); diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index f62ad81..ab510d5 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -22,12 +22,14 @@ package jalview.io; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import jalview.util.Comparison; import jalview.util.Format; import java.io.IOException; +import java.util.ArrayList; import java.util.Hashtable; +import java.util.List; import java.util.StringTokenizer; -import java.util.Vector; /** * DOCUMENT ME! @@ -67,24 +69,23 @@ public class MSFfile extends AlignFile } /** - * DOCUMENT ME! + * Read and parse MSF sequence data */ @Override public void parse() throws IOException { - int i = 0; boolean seqFlag = false; - String key = new String(); - Vector headers = new Vector(); - Hashtable seqhash = new Hashtable(); - String line; + List headers = new ArrayList(); + Hashtable seqhash = new Hashtable(); try { + String line; while ((line = nextLine()) != null) { StringTokenizer str = new StringTokenizer(line); + String key = null; while (str.hasMoreTokens()) { String inStr = str.nextToken(); @@ -93,31 +94,31 @@ public class MSFfile extends AlignFile if (inStr.indexOf("Name:") != -1) { key = str.nextToken(); - headers.addElement(key); + headers.add(key); } - // if line has // set SeqFlag to 1 so we know sequences are coming + // if line has // set SeqFlag so we know sequences are coming if (inStr.indexOf("//") != -1) { seqFlag = true; } // Process lines as sequence lines if seqFlag is set - if ((inStr.indexOf("//") == -1) && (seqFlag == true)) + if ((inStr.indexOf("//") == -1) && seqFlag) { - // seqeunce id is the first field + // sequence id is the first field key = inStr; - StringBuffer tempseq; + StringBuilder tempseq; // Get sequence from hash if it exists if (seqhash.containsKey(key)) { - tempseq = (StringBuffer) seqhash.get(key); + tempseq = seqhash.get(key); } else { - tempseq = new StringBuffer(); + tempseq = new StringBuilder(64); seqhash.put(key, tempseq); } @@ -125,7 +126,8 @@ public class MSFfile extends AlignFile while (str.hasMoreTokens()) { // append the word to the sequence - tempseq.append(str.nextToken()); + String sequenceBlock = str.nextToken(); + tempseq.append(sequenceBlock); } } } @@ -139,11 +141,11 @@ public class MSFfile extends AlignFile this.noSeqs = headers.size(); // Add sequences to the hash - for (i = 0; i < headers.size(); i++) + for (int i = 0; i < headers.size(); i++) { - if (seqhash.get(headers.elementAt(i)) != null) + if (seqhash.get(headers.get(i)) != null) { - String head = headers.elementAt(i).toString(); + String head = headers.get(i); String seq = seqhash.get(head).toString(); if (maxLength < head.length()) @@ -151,8 +153,11 @@ public class MSFfile extends AlignFile maxLength = head.length(); } - // Replace ~ with a sensible gap character - seq = seq.replace('~', '-'); + /* + * replace ~ (leading/trailing positions) with the gap character; + * use '.' as this is the internal gap character required by MSF + */ + seq = seq.replace('~', '.'); Sequence newSeq = parseId(head); @@ -163,7 +168,7 @@ public class MSFfile extends AlignFile else { System.err.println("MSFFile Parser: Can't find sequence for " - + headers.elementAt(i)); + + headers.get(i)); } } } @@ -211,15 +216,16 @@ public class MSFfile extends AlignFile * * @return DOCUMENT ME! */ - public String print(SequenceI[] seqs) + public String print(SequenceI[] sqs) { - boolean is_NA = jalview.util.Comparison.isNucleotide(seqs); + boolean is_NA = Comparison.isNucleotide(sqs); - SequenceI[] s = new SequenceI[seqs.length]; + SequenceI[] s = new SequenceI[sqs.length]; - StringBuffer out = new StringBuffer("!!" + (is_NA ? "NA" : "AA") - + "_MULTIPLE_ALIGNMENT 1.0"); + StringBuilder out = new StringBuilder(256); + out.append("!!").append(is_NA ? "NA" : "AA") + .append("_MULTIPLE_ALIGNMENT 1.0"); // TODO: JBPNote : Jalview doesn't remember NA or AA yet. out.append(newline); out.append(newline); @@ -227,14 +233,16 @@ public class MSFfile extends AlignFile int maxid = 0; int i = 0; - while ((i < seqs.length) && (seqs[i] != null)) + while ((i < sqs.length) && (sqs[i] != null)) { - // Replace all internal gaps with . and external spaces with ~ - s[i] = new Sequence(seqs[i].getName(), seqs[i].getSequenceAsString() - .replace('-', '.'), seqs[i].getStart(), seqs[i].getEnd()); + /* + * modify to MSF format: uses '.' for internal gaps, + * and '~' for leading or trailing gaps + */ + String seqString = sqs[i].getSequenceAsString() + .replace('-', '.'); - StringBuffer sb = new StringBuffer(); - sb.append(s[i].getSequence()); + StringBuilder sb = new StringBuilder(seqString); for (int ii = 0; ii < sb.length(); ii++) { @@ -259,12 +267,12 @@ public class MSFfile extends AlignFile break; } } + s[i] = new Sequence(sqs[i].getName(), sb.toString(), + sqs[i].getStart(), sqs[i].getEnd()); - s[i].setSequence(sb.toString()); - - if (s[i].getSequence().length > max) + if (sb.length() > max) { - max = s[i].getSequence().length; + max = sb.length(); } i++; diff --git a/src/jalview/io/PfamFile.java b/src/jalview/io/PfamFile.java index 71cc7f0..667da9f 100755 --- a/src/jalview/io/PfamFile.java +++ b/src/jalview/io/PfamFile.java @@ -108,7 +108,7 @@ public class PfamFile extends AlignFile } if (spces + 1 < line.length()) { - tempseq.append(line.substring(spces + 1)); + tempseq.append(line.substring(spces + 1).trim()); } } diff --git a/test/jalview/io/FormatAdapterTest.java b/test/jalview/io/FormatAdapterTest.java new file mode 100644 index 0000000..81e336e --- /dev/null +++ b/test/jalview/io/FormatAdapterTest.java @@ -0,0 +1,138 @@ +package jalview.io; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.fail; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class FormatAdapterTest +{ + + /** + * Test saving and re-reading in a specified format + * + * @throws IOException + */ + @Test(groups = { "Functional" }, dataProvider = "formats") + public void testRoundTrip(String format) throws IOException + { + try + { + AlignmentI al = new FormatAdapter().readFile("examples/uniref50.fa", + FormatAdapter.FILE, "FASTA"); + + /* + * 'gap' is the gap character used in the alignment data file here, + * not the user preferred gap character + */ + char gap = al.getGapCharacter(); + assertNotNull(al); + + SequenceI[] seqs = al.getSequencesArray(); + String formatted = new FormatAdapter().formatSequences(format, al, + false); + + AlignmentI reloaded = new FormatAdapter().readFile(formatted, + FormatAdapter.PASTE, format); + List reread = reloaded.getSequences(); + assertEquals("Wrong number of reloaded sequences", seqs.length, + reread.size()); + + int i = 0; + for (SequenceI seq : reread) + { + String sequenceString = seq.getSequenceAsString(); + + /* + * special case: MSF always uses '.' as gap character + */ + sequenceString = adjustForGapTreatment(sequenceString, gap, format); + assertEquals( + String.format("Sequence %d: %s", i, + seqs[i].getName()), seqs[i].getSequenceAsString(), + sequenceString); + i++; + } + } catch (IOException e) + { + fail(String + .format("Format %s failed with %s", format, e.getMessage())); + } + } + + /** + * Optionally change the gap character in the string to the given character, + * depending on the sequence file format + * + * @param sequenceString + * a sequence (as written in 'format' format) + * @param gap + * the sequence's original gap character + * @param format + * @return + */ + String adjustForGapTreatment(String sequenceString, char gap, + String format) + { + if ("MSF".equals(format)) + { + /* + * MSF forces gap character to '.', so change it back + * for comparison purposes + */ + sequenceString = sequenceString.replace('.', gap); + } + return sequenceString; + } + + /** + * Data provider that serves alignment formats that are both readable and + * writable + * + * @return + */ + @DataProvider(name = "formats") + static Object[][] getFormats() + { + List both = new ArrayList(); + String[] readable = FormatAdapter.READABLE_FORMATS; + List writeable = Arrays.asList(FormatAdapter.WRITEABLE_FORMATS); + for (String r : readable) + { + if (writeable.contains(r)) + { + both.add(r); + } + } + + Object[][] formats = new Object[both.size()][]; + int i = 0; + for (String format : both) + { + formats[i] = new Object[] { format }; + i++; + } + return formats; + } + + /** + * Enable this to isolate testing to a single file format + * + * @throws IOException + */ + @Test(groups = { "Functional" }, enabled = false) + public void testOneFormatRoundTrip() throws IOException + { + testRoundTrip("JSON"); + } +} diff --git a/test/jalview/io/StockholmFileTest.java b/test/jalview/io/StockholmFileTest.java index d7a9166..b034fb0 100644 --- a/test/jalview/io/StockholmFileTest.java +++ b/test/jalview/io/StockholmFileTest.java @@ -23,6 +23,7 @@ package jalview.io; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertTrue; +import static org.testng.AssertJUnit.fail; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -159,7 +160,7 @@ public class StockholmFileTest * 'secondary' or generated alignment from some datapreserving * transformation * @param ignoreFeatures - * when true, differences in seuqence feature annotation are ignored. + * when true, differences in sequence feature annotation are ignored */ public static void testAlignmentEquivalence(AlignmentI al, AlignmentI al_input, boolean ignoreFeatures) @@ -167,12 +168,9 @@ public class StockholmFileTest assertNotNull("Original alignment was null", al); assertNotNull("Generated alignment was null", al_input); - assertTrue( - "Alignment dimension mismatch: originl contains " - + al.getHeight() + " and generated has " - + al_input.getHeight() + " sequences; original has " - + al.getWidth() + " and generated has " - + al_input.getWidth() + " columns.", + assertTrue("Alignment dimension mismatch: original: " + al.getHeight() + + "x" + al.getWidth() + ", generated: " + al_input.getHeight() + + "x" + al_input.getWidth(), al.getHeight() == al_input.getHeight() && al.getWidth() == al_input.getWidth()); @@ -183,9 +181,10 @@ public class StockholmFileTest // note - at moment we do not distinguish between alignment without any // annotation rows and alignment with no annotation row vector // we might want to revise this in future - int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0 - : aa_original.length); - Map orig_groups = new HashMap(), new_groups = new HashMap(); + int aa_new_size = (aa_new == null ? 0 : aa_new.length); + int aa_original_size = (aa_original == null ? 0 : aa_original.length); + Map orig_groups = new HashMap(); + Map new_groups = new HashMap(); if (aa_new != null && aa_original != null) { @@ -196,20 +195,17 @@ public class StockholmFileTest assertTrue("Different alignment annotation at position " + i, equalss(aa_original[i], aa_new[i])); // compare graphGroup or graph properties - needed to verify JAL-1299 - assertTrue("Graph type not identical.", - aa_original[i].graph == aa_new[i].graph); - assertTrue("Visibility not identical.", - aa_original[i].visible == aa_new[i].visible); - assertTrue( - "Threshold line not identical.", - aa_original[i].threshold == null ? aa_new[i].threshold == null - : aa_original[i].threshold - .equals(aa_new[i].threshold)); + assertEquals("Graph type not identical.", aa_original[i].graph, + aa_new[i].graph); + assertEquals("Visibility not identical.", aa_original[i].visible, + aa_new[i].visible); + assertEquals("Threshold line not identical.", + aa_original[i].threshold, aa_new[i].threshold); // graphGroup may differ, but pattern should be the same - Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer( - aa_new[i].graphGroup + 2); - BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups - .get(n_ggrp); + Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2); + Integer n_ggrp = new Integer(aa_new[i].graphGroup + 2); + BitSet orig_g = orig_groups.get(o_ggrp); + BitSet new_g = new_groups.get(n_ggrp); if (orig_g == null) { orig_groups.put(o_ggrp, orig_g = new BitSet()); @@ -218,8 +214,8 @@ public class StockholmFileTest { new_groups.put(n_ggrp, new_g = new BitSet()); } - assertTrue("Graph Group pattern differs at annotation " + i, - orig_g.equals(new_g)); + assertEquals("Graph Group pattern differs at annotation " + i, + orig_g, new_g); orig_g.set(i); new_g.set(i); } @@ -230,10 +226,9 @@ public class StockholmFileTest } } } - assertTrue( - "Generated and imported alignment have different annotation sets (" - + aa_new_size + " != " + aa_original_size + ")", - aa_new_size == aa_original_size); + assertEquals( + "Generated and imported alignment have different annotation sets", + aa_new_size, aa_original_size); // check sequences, annotation and features SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length]; @@ -260,8 +255,8 @@ public class StockholmFileTest { String ss_original = seq_original[i].getSequenceAsString(); String ss_new = seq_new[in].getSequenceAsString(); - assertTrue("The sequences " + name + "/" + start + "-" + end - + " are not equal", ss_original.equals(ss_new)); + assertEquals("The sequences " + name + "/" + start + "-" + end + + " are not equal", ss_original, ss_new); assertTrue( "Sequence Features were not equivalent" @@ -284,15 +279,16 @@ public class StockholmFileTest .getSequenceFeatures().length]; sequenceFeatures_new = seq_new[in].getSequenceFeatures(); - assertTrue("different number of features", seq_original[i] - .getSequenceFeatures().length == seq_new[in] + assertEquals("different number of features", + seq_original[i].getSequenceFeatures().length, + seq_new[in] .getSequenceFeatures().length); for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++) { - assertTrue("Different features", - sequenceFeatures_original[feat] - .equals(sequenceFeatures_new[feat])); + assertEquals("Different features", + sequenceFeatures_original[feat], + sequenceFeatures_new[feat]); } } // compare alignment annotation @@ -319,9 +315,9 @@ public class StockholmFileTest else if (al.getSequenceAt(i).getAnnotation() != null && al_input.getSequenceAt(in).getAnnotation() == null) { - assertTrue("Annotations differed between sequences (" + fail("Annotations differed between sequences (" + al.getSequenceAt(i).getName() + ") and (" - + al_input.getSequenceAt(i).getName() + ")", false); + + al_input.getSequenceAt(i).getName() + ")"); } break; } -- 1.7.10.2