JAL-3446 JAL-3253 JAL-3445 BSML bbb XML format reader (preliminary)
authorBobHanson <hansonr@stolaf.edu>
Tue, 2 Jun 2020 17:28:18 +0000 (12:28 -0500)
committerBobHanson <hansonr@stolaf.edu>
Tue, 2 Jun 2020 17:28:18 +0000 (12:28 -0500)
src/jalview/io/BSMLFile.java [new file with mode: 0644]
src/jalview/io/FileFormat.java
src/jalview/io/IdentifyFile.java

diff --git a/src/jalview/io/BSMLFile.java b/src/jalview/io/BSMLFile.java
new file mode 100644 (file)
index 0000000..69e29f8
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MessageManager;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
+import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
+import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
+
+/**
+ * Preliminary reader for Bioinformatics Sequence Markup Language
+ * http://www.bsml.org
+ * 
+ * @author hansonr
+ *
+ */
+public class BSMLFile extends AlignFile
+{
+
+  public BSMLFile()
+  {
+    super();
+
+  }
+
+  public BSMLFile(String inFile, DataSourceType type) throws IOException
+  {
+    super(inFile, type);
+
+  }
+
+  public BSMLFile(FileParse source) throws IOException
+  {
+    super(source);
+
+  }
+
+  public BufferedReader CreateReader() throws FileNotFoundException
+  {
+    FileReader fr = null;
+    fr = new FileReader(inFile);
+
+    BufferedReader r = new BufferedReader(fr);
+    return r;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see jalview.io.AlignFile#parse()
+   */
+  @Override
+  public void parse() throws IOException
+  {
+    try
+    {
+      _parse();
+    } catch (ExceptionPermissionDenied pdx)
+    {
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_couldnt_access_datasource", new String[]
+              { pdx.getMessage() });
+      throw new IOException(pdx);
+    } catch (ExceptionLoadingFailed lf)
+    {
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_couldnt_process_data", new String[]
+              { lf.getMessage() });
+      throw new IOException(lf);
+    } catch (ExceptionFileFormatOrSyntax iff)
+    {
+      errormessage = MessageManager
+              .formatMessage("exception.BSML_invalid_file", new String[]
+              { iff.getMessage() });
+      throw new IOException(iff);
+    } catch (Exception x)
+    {
+      error = true;
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_problem_parsing_data", new String[]
+              { x.getMessage() });
+      throw new IOException(errormessage, x);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  public void _parse()
+          throws ExceptionPermissionDenied, ExceptionLoadingFailed,
+          ExceptionFileFormatOrSyntax, ParserConfigurationException,
+          SAXException, IOException
+  {
+
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+
+    dbf.setIgnoringElementContentWhitespace(true);
+    dbf.setIgnoringComments(true);
+    dbf.setValidating(true);
+    dbf.setCoalescing(true);
+    dbf.setNamespaceAware(true);
+      dbf.setFeature("http://xml.org/sax/features/namespaces", false);
+      dbf.setFeature("http://xml.org/sax/features/validation", false);
+      dbf.setFeature(
+              "http://apache.org/xml/features/nonvalidating/load-dtd-grammar",
+              false);
+      dbf.setFeature(
+              "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+              false);
+
+      DocumentBuilder db = dbf.newDocumentBuilder();
+
+      Map<String, SequenceI> htSeq = new Hashtable<>();
+      InputSource is = new InputSource(getReader());
+      Document d = db.parse(is);
+      NodeList sequences = d.getElementsByTagName("Sequence-data");
+      int n = sequences.getLength();
+      SequenceI[] sqs = new SequenceI[n];
+      for (int i = 0; i < n; i++)
+      {
+        Element e = (Element) sequences.item(i);
+        String s = e.getTextContent();
+        String id = e.getAttribute("seq-name");
+        SequenceI seq = sqs[i] = new Sequence(id, s, 1, s.length());
+        htSeq.put(id, seq);
+        // ?? sqs[i].setEnd(sqs[i].findPosition(sqs[i].getLength()));
+      }
+
+      sequences = d.getElementsByTagName("Sequence");
+      n = sequences.getLength();
+      for (int i = 0; i < n; i++)
+      {
+        Element e = (Element) sequences.item(i);
+        String mol = e.getAttribute("molecule"); // dna or rna
+        if (!"dna".equals(mol))
+        {
+          System.err.println("BSML molecule=rna not implemented");
+          continue;
+        }
+        String title = e.getAttribute("title");
+        SequenceI seq = htSeq.get(title);
+        if (seq == null)
+        {
+          continue;
+        }
+      NodeList features = e.getElementsByTagName("Feature");
+        int featureCount = features.getLength();
+        for (int f = 0; f < featureCount; f++)
+        {
+          Element feature = (Element) features.item(f);
+          // <Feature class="GENE" title="CPXV-GER_1980_EP4-211">
+          // <Interval-loc complement="0" endpos="217104" startpos="216643"/>
+          // <Resource id="GENE-ID:119705"/>
+          // </Feature>
+        Element iloc = (Element) feature
+                .getElementsByTagName("Interval-loc").item(0);
+          String complement = iloc.getAttribute("complement");
+          if (!"0".equals(complement))
+          {
+            // Jalview cannot handle complement genes (running backward on the
+            // complementary strand);
+            continue;
+          }
+          String fclass = feature.getAttribute("class");
+          if (!"GENE".equals(fclass))
+          {
+            // just processing GENE features for now;
+            continue;
+          }
+          String ftitle = feature.getAttribute("title");
+        int start = Integer.parseInt(iloc.getAttribute("startpos"));
+        int end = Integer.parseInt(iloc.getAttribute("endpos"));
+        SequenceFeature sf = new SequenceFeature("GENE", ftitle, start, end,
+                null);
+        seq.addSequenceFeature(sf);
+        }
+        setSeqs(sqs);
+      }
+
+  }
+
+  @Override
+  public String print(SequenceI[] s, boolean jvSuffix)
+  {
+    return "not yet implemented";
+  }
+
+}
index cb61740..df76c61 100644 (file)
@@ -29,6 +29,21 @@ import java.io.IOException;
 
 public enum FileFormat implements FileFormatI
 {
+  BSML("BSML", "bbb", true, false)
+  {
+    @Override
+    public AlignmentFileReaderI getReader(FileParse source)
+            throws IOException
+    {
+      return new BSMLFile(source);
+    }
+
+    @Override
+    public AlignmentFileWriterI getWriter(AlignmentI al)
+    {
+      return null;
+    }
+  },
   Fasta("Fasta", "fa, fasta, mfa, fastq", true, true)
   {
     @Override
index b312474..384797e 100755 (executable)
@@ -165,8 +165,8 @@ public class IdentifyFile
           if (source.inFile != null)
           {
             String fileStr = source.inFile.getName();
-            if (fileStr.contains(".jar")
-                    || fileStr.contains(".zip") || fileStr.contains(".jvp"))
+            if (fileStr.contains(".jar") || fileStr.contains(".zip")
+                    || fileStr.contains(".jvp"))
             {
               // possibly a Jalview archive (but check further)
               reply = FileFormat.Jalview;
@@ -319,6 +319,11 @@ public class IdentifyFile
         if ((lessThan > -1)) // possible Markup Language data i.e HTML,
                              // RNAML, XML
         {
+          if (data.startsWith("<BSML>"))
+          {
+            reply = FileFormat.BSML;
+            break;
+          }
           String upper = data.toUpperCase();
           if (upper.substring(lessThan).startsWith("<HTML"))
           {