JAL-3445 branch for BSML file reading development Jalview-JS/JAL-3253-applet-JAL-3445_support_for_BSML
authorhansonr <hansonr@STO24954W.ad.stolaf.edu>
Mon, 14 Oct 2019 14:40:05 +0000 (10:40 -0400)
committerhansonr <hansonr@STO24954W.ad.stolaf.edu>
Mon, 14 Oct 2019 14:40:05 +0000 (10:40 -0400)
src/jalview/io/BSMLFile.java [new file with mode: 0644]
src/jalview/io/FileFormat.java
src/jalview/io/IdentifyFile.java
test/jalview/io/JSONFileTest.java

diff --git a/src/jalview/io/BSMLFile.java b/src/jalview/io/BSMLFile.java
new file mode 100644 (file)
index 0000000..69e29f8
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MessageManager;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
+import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
+import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
+
+/**
+ * Preliminary reader for Bioinformatics Sequence Markup Language
+ * http://www.bsml.org
+ * 
+ * @author hansonr
+ *
+ */
+public class BSMLFile extends AlignFile
+{
+
+  public BSMLFile()
+  {
+    super();
+
+  }
+
+  public BSMLFile(String inFile, DataSourceType type) throws IOException
+  {
+    super(inFile, type);
+
+  }
+
+  public BSMLFile(FileParse source) throws IOException
+  {
+    super(source);
+
+  }
+
+  public BufferedReader CreateReader() throws FileNotFoundException
+  {
+    FileReader fr = null;
+    fr = new FileReader(inFile);
+
+    BufferedReader r = new BufferedReader(fr);
+    return r;
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see jalview.io.AlignFile#parse()
+   */
+  @Override
+  public void parse() throws IOException
+  {
+    try
+    {
+      _parse();
+    } catch (ExceptionPermissionDenied pdx)
+    {
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_couldnt_access_datasource", new String[]
+              { pdx.getMessage() });
+      throw new IOException(pdx);
+    } catch (ExceptionLoadingFailed lf)
+    {
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_couldnt_process_data", new String[]
+              { lf.getMessage() });
+      throw new IOException(lf);
+    } catch (ExceptionFileFormatOrSyntax iff)
+    {
+      errormessage = MessageManager
+              .formatMessage("exception.BSML_invalid_file", new String[]
+              { iff.getMessage() });
+      throw new IOException(iff);
+    } catch (Exception x)
+    {
+      error = true;
+      errormessage = MessageManager.formatMessage(
+              "exception.BSML_problem_parsing_data", new String[]
+              { x.getMessage() });
+      throw new IOException(errormessage, x);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  public void _parse()
+          throws ExceptionPermissionDenied, ExceptionLoadingFailed,
+          ExceptionFileFormatOrSyntax, ParserConfigurationException,
+          SAXException, IOException
+  {
+
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+
+    dbf.setIgnoringElementContentWhitespace(true);
+    dbf.setIgnoringComments(true);
+    dbf.setValidating(true);
+    dbf.setCoalescing(true);
+    dbf.setNamespaceAware(true);
+      dbf.setFeature("http://xml.org/sax/features/namespaces", false);
+      dbf.setFeature("http://xml.org/sax/features/validation", false);
+      dbf.setFeature(
+              "http://apache.org/xml/features/nonvalidating/load-dtd-grammar",
+              false);
+      dbf.setFeature(
+              "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+              false);
+
+      DocumentBuilder db = dbf.newDocumentBuilder();
+
+      Map<String, SequenceI> htSeq = new Hashtable<>();
+      InputSource is = new InputSource(getReader());
+      Document d = db.parse(is);
+      NodeList sequences = d.getElementsByTagName("Sequence-data");
+      int n = sequences.getLength();
+      SequenceI[] sqs = new SequenceI[n];
+      for (int i = 0; i < n; i++)
+      {
+        Element e = (Element) sequences.item(i);
+        String s = e.getTextContent();
+        String id = e.getAttribute("seq-name");
+        SequenceI seq = sqs[i] = new Sequence(id, s, 1, s.length());
+        htSeq.put(id, seq);
+        // ?? sqs[i].setEnd(sqs[i].findPosition(sqs[i].getLength()));
+      }
+
+      sequences = d.getElementsByTagName("Sequence");
+      n = sequences.getLength();
+      for (int i = 0; i < n; i++)
+      {
+        Element e = (Element) sequences.item(i);
+        String mol = e.getAttribute("molecule"); // dna or rna
+        if (!"dna".equals(mol))
+        {
+          System.err.println("BSML molecule=rna not implemented");
+          continue;
+        }
+        String title = e.getAttribute("title");
+        SequenceI seq = htSeq.get(title);
+        if (seq == null)
+        {
+          continue;
+        }
+      NodeList features = e.getElementsByTagName("Feature");
+        int featureCount = features.getLength();
+        for (int f = 0; f < featureCount; f++)
+        {
+          Element feature = (Element) features.item(f);
+          // <Feature class="GENE" title="CPXV-GER_1980_EP4-211">
+          // <Interval-loc complement="0" endpos="217104" startpos="216643"/>
+          // <Resource id="GENE-ID:119705"/>
+          // </Feature>
+        Element iloc = (Element) feature
+                .getElementsByTagName("Interval-loc").item(0);
+          String complement = iloc.getAttribute("complement");
+          if (!"0".equals(complement))
+          {
+            // Jalview cannot handle complement genes (running backward on the
+            // complementary strand);
+            continue;
+          }
+          String fclass = feature.getAttribute("class");
+          if (!"GENE".equals(fclass))
+          {
+            // just processing GENE features for now;
+            continue;
+          }
+          String ftitle = feature.getAttribute("title");
+        int start = Integer.parseInt(iloc.getAttribute("startpos"));
+        int end = Integer.parseInt(iloc.getAttribute("endpos"));
+        SequenceFeature sf = new SequenceFeature("GENE", ftitle, start, end,
+                null);
+        seq.addSequenceFeature(sf);
+        }
+        setSeqs(sqs);
+      }
+
+  }
+
+  @Override
+  public String print(SequenceI[] s, boolean jvSuffix)
+  {
+    return "not yet implemented";
+  }
+
+}
index cb61740..df76c61 100644 (file)
@@ -29,6 +29,21 @@ import java.io.IOException;
 
 public enum FileFormat implements FileFormatI
 {
+  BSML("BSML", "bbb", true, false)
+  {
+    @Override
+    public AlignmentFileReaderI getReader(FileParse source)
+            throws IOException
+    {
+      return new BSMLFile(source);
+    }
+
+    @Override
+    public AlignmentFileWriterI getWriter(AlignmentI al)
+    {
+      return null;
+    }
+  },
   Fasta("Fasta", "fa, fasta, mfa, fastq", true, true)
   {
     @Override
index 621cfac..fc9938f 100755 (executable)
@@ -179,6 +179,15 @@ public class IdentifyFile
             break;
           }
         }
+        if (data.startsWith("<?xml"))
+        {
+          data = source.nextLine();
+          if (data.startsWith("<Bsml>"))
+          {
+            reply = FileFormat.BSML;
+            break;
+          }
+        }
         data = data.toUpperCase();
 
         if (data.startsWith(ScoreMatrixFile.SCOREMATRIX))
index 2d5f7a2..b43f3aa 100644 (file)
@@ -35,6 +35,7 @@ import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 import jalview.datamodel.features.SequenceFeatures;
 import jalview.gui.AlignFrame;
+import jalview.gui.Desktop;
 import jalview.gui.JvOptionPane;
 import jalview.json.binding.biojson.v1.ColourSchemeMapper;
 import jalview.schemes.ColourSchemeI;
@@ -100,6 +101,8 @@ public class JSONFileTest
   @BeforeTest(alwaysRun = true)
   public void setup() throws Exception
   {
+
+    Desktop.getInstanceOnly(); // Prevents Desktop.checkURLLinks();
     /*
      * construct expected values
      * nb this have to match the data in examples/example.json