From f42cec3b999bbed0ce85045760b55701c527da32 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 18 Aug 2020 14:22:56 +0100 Subject: [PATCH] JAL-1260 file formats and IdentifyFile updates for GenBank and ENA --- src/jalview/io/FileFormat.java | 25 ++++++++++++++++++++----- src/jalview/io/FlatFile.java | 2 ++ src/jalview/io/IdentifyFile.java | 13 +++++++++++++ test/jalview/io/EmblFlatFileTest.java | 2 -- test/jalview/io/GenBankFileTest.java | 1 - test/jalview/io/IdentifyFileTest.java | 6 ++++-- 6 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/jalview/io/FileFormat.java b/src/jalview/io/FileFormat.java index 4f0480d..1d240b2 100644 --- a/src/jalview/io/FileFormat.java +++ b/src/jalview/io/FileFormat.java @@ -20,14 +20,14 @@ */ package jalview.io; +import java.io.IOException; + import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; import jalview.ext.jmol.JmolParser; import jalview.structure.StructureImportSettings; -import java.io.IOException; - public enum FileFormat implements FileFormatI { Fasta("Fasta", "fa, fasta, mfa, fastq", true, true) @@ -244,7 +244,22 @@ public enum FileFormat implements FileFormatI return new PhylipFile(); } }, - GenBank("GenBank/ENA Flatfile","gb",false,false) + GenBank("GenBank Flatfile", "gb", true, false) + { + @Override + public AlignmentFileReaderI getReader(FileParse source) + throws IOException + { + return new GenBankFile(source, "GenBank"); + } + + @Override + public AlignmentFileWriterI getWriter(AlignmentI al) + { + return null; + } + }, + Embl("ENA Flatfile", "txt", true, false) { @Override public AlignmentFileReaderI getReader(FileParse source) @@ -253,13 +268,13 @@ public enum FileFormat implements FileFormatI // Always assume we import from EMBL for now return new EmblFlatFile(source, DBRefSource.EMBL); } + @Override public AlignmentFileWriterI getWriter(AlignmentI al) { return null; } - } - , + }, Jnet("JnetFile", "", false, false) { @Override diff --git a/src/jalview/io/FlatFile.java b/src/jalview/io/FlatFile.java index 737f1d8..01ce085 100644 --- a/src/jalview/io/FlatFile.java +++ b/src/jalview/io/FlatFile.java @@ -194,6 +194,8 @@ public abstract class FlatFile extends AlignFile * using TreeMap gives CDS sequences in alphabetical, so readable, order */ cds = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); + + parse(); } /** diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index b312474..e7ee711 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -185,6 +185,19 @@ public class IdentifyFile reply = FileFormat.ScoreMatrix; break; } + if (data.startsWith("LOCUS")) + { + reply = FileFormat.GenBank; + break; + } + if (data.startsWith("ID ")) + { + if (data.substring(2).trim().split(";").length == 7) + { + reply = FileFormat.Embl; + break; + } + } if (data.startsWith("H ") && !aaIndexHeaderRead) { aaIndexHeaderRead = true; diff --git a/test/jalview/io/EmblFlatFileTest.java b/test/jalview/io/EmblFlatFileTest.java index c893c09..ee853f3 100644 --- a/test/jalview/io/EmblFlatFileTest.java +++ b/test/jalview/io/EmblFlatFileTest.java @@ -47,7 +47,6 @@ public class EmblFlatFileTest File dataFile = new File("test/jalview/io/J03321.embl.txt"); FileParse fp = new FileParse(dataFile, DataSourceType.FILE); EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); - parser.parse(); List seqs = parser.getSeqs(); assertEquals(seqs.size(), 1); @@ -263,7 +262,6 @@ public class EmblFlatFileTest + " ggatGcgtaa gttagacgaa attttgtctt tgcgcacaga 40\n"; FileParse fp = new FileParse(data, DataSourceType.PASTE); EmblFlatFile parser = new EmblFlatFile(fp, "EmblTest"); - parser.parse(); List seqs = parser.getSeqs(); assertEquals(seqs.size(), 1); SequenceI seq = seqs.get(0); diff --git a/test/jalview/io/GenBankFileTest.java b/test/jalview/io/GenBankFileTest.java index d800b1d..25ad601 100644 --- a/test/jalview/io/GenBankFileTest.java +++ b/test/jalview/io/GenBankFileTest.java @@ -44,7 +44,6 @@ public class GenBankFileTest FileParse fp = new FileParse(dataFile.getAbsolutePath(), DataSourceType.FILE); FlatFile parser = new GenBankFile(fp, "GenBankTest"); - parser.parse(); List seqs = parser.getSeqs(); assertEquals(seqs.size(), 1); diff --git a/test/jalview/io/IdentifyFileTest.java b/test/jalview/io/IdentifyFileTest.java index cf7f58f..68c099e 100644 --- a/test/jalview/io/IdentifyFileTest.java +++ b/test/jalview/io/IdentifyFileTest.java @@ -24,13 +24,13 @@ import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import jalview.gui.JvOptionPane; - import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import jalview.gui.JvOptionPane; + public class IdentifyFileTest { @@ -102,6 +102,8 @@ public class IdentifyFileTest { "examples/testdata/test.html", FileFormat.Html }, { "examples/testdata/test.pileup", FileFormat.Pileup }, { "examples/testdata/test.blc", FileFormat.BLC }, + { "test/jalview/io/J03321.embl.txt", FileFormat.Embl }, + { "test/jalview/io/J03321.gb", FileFormat.GenBank }, { "examples/exampleFeatures.txt", FileFormat.Features }, { "examples/testdata/simpleGff3.gff", FileFormat.Features }, { "examples/testdata/test.jvp", FileFormat.Jalview }, -- 1.7.10.2