From: BobHanson Date: Tue, 2 Jun 2020 17:28:18 +0000 (-0500) Subject: JAL-3446 JAL-3253 JAL-3445 BSML bbb XML format reader (preliminary) X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=80202c8eaaa6b288b1930c5a332d32d63b262059;p=jalview.git JAL-3446 JAL-3253 JAL-3445 BSML bbb XML format reader (preliminary) --- diff --git a/src/jalview/io/BSMLFile.java b/src/jalview/io/BSMLFile.java new file mode 100644 index 0000000..69e29f8 --- /dev/null +++ b/src/jalview/io/BSMLFile.java @@ -0,0 +1,225 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.MessageManager; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.Hashtable; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax; +import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed; +import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied; + +/** + * Preliminary reader for Bioinformatics Sequence Markup Language + * http://www.bsml.org + * + * @author hansonr + * + */ +public class BSMLFile extends AlignFile +{ + + public BSMLFile() + { + super(); + + } + + public BSMLFile(String inFile, DataSourceType type) throws IOException + { + super(inFile, type); + + } + + public BSMLFile(FileParse source) throws IOException + { + super(source); + + } + + public BufferedReader CreateReader() throws FileNotFoundException + { + FileReader fr = null; + fr = new FileReader(inFile); + + BufferedReader r = new BufferedReader(fr); + return r; + } + + /* + * (non-Javadoc) + * + * @see jalview.io.AlignFile#parse() + */ + @Override + public void parse() throws IOException + { + try + { + _parse(); + } catch (ExceptionPermissionDenied pdx) + { + errormessage = MessageManager.formatMessage( + "exception.BSML_couldnt_access_datasource", new String[] + { pdx.getMessage() }); + throw new IOException(pdx); + } catch (ExceptionLoadingFailed lf) + { + errormessage = MessageManager.formatMessage( + "exception.BSML_couldnt_process_data", new String[] + { lf.getMessage() }); + throw new IOException(lf); + } catch (ExceptionFileFormatOrSyntax iff) + { + errormessage = MessageManager + .formatMessage("exception.BSML_invalid_file", new String[] + { iff.getMessage() }); + throw new IOException(iff); + } catch (Exception x) + { + error = true; + errormessage = MessageManager.formatMessage( + "exception.BSML_problem_parsing_data", new String[] + { x.getMessage() }); + throw new IOException(errormessage, x); + } + } + + @SuppressWarnings("unchecked") + public void _parse() + throws ExceptionPermissionDenied, ExceptionLoadingFailed, + ExceptionFileFormatOrSyntax, ParserConfigurationException, + SAXException, IOException + { + + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + + dbf.setIgnoringElementContentWhitespace(true); + dbf.setIgnoringComments(true); + dbf.setValidating(true); + dbf.setCoalescing(true); + dbf.setNamespaceAware(true); + dbf.setFeature("http://xml.org/sax/features/namespaces", false); + dbf.setFeature("http://xml.org/sax/features/validation", false); + dbf.setFeature( + "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", + false); + dbf.setFeature( + "http://apache.org/xml/features/nonvalidating/load-external-dtd", + false); + + DocumentBuilder db = dbf.newDocumentBuilder(); + + Map htSeq = new Hashtable<>(); + InputSource is = new InputSource(getReader()); + Document d = db.parse(is); + NodeList sequences = d.getElementsByTagName("Sequence-data"); + int n = sequences.getLength(); + SequenceI[] sqs = new SequenceI[n]; + for (int i = 0; i < n; i++) + { + Element e = (Element) sequences.item(i); + String s = e.getTextContent(); + String id = e.getAttribute("seq-name"); + SequenceI seq = sqs[i] = new Sequence(id, s, 1, s.length()); + htSeq.put(id, seq); + // ?? sqs[i].setEnd(sqs[i].findPosition(sqs[i].getLength())); + } + + sequences = d.getElementsByTagName("Sequence"); + n = sequences.getLength(); + for (int i = 0; i < n; i++) + { + Element e = (Element) sequences.item(i); + String mol = e.getAttribute("molecule"); // dna or rna + if (!"dna".equals(mol)) + { + System.err.println("BSML molecule=rna not implemented"); + continue; + } + String title = e.getAttribute("title"); + SequenceI seq = htSeq.get(title); + if (seq == null) + { + continue; + } + NodeList features = e.getElementsByTagName("Feature"); + int featureCount = features.getLength(); + for (int f = 0; f < featureCount; f++) + { + Element feature = (Element) features.item(f); + // + // + // + // + Element iloc = (Element) feature + .getElementsByTagName("Interval-loc").item(0); + String complement = iloc.getAttribute("complement"); + if (!"0".equals(complement)) + { + // Jalview cannot handle complement genes (running backward on the + // complementary strand); + continue; + } + String fclass = feature.getAttribute("class"); + if (!"GENE".equals(fclass)) + { + // just processing GENE features for now; + continue; + } + String ftitle = feature.getAttribute("title"); + int start = Integer.parseInt(iloc.getAttribute("startpos")); + int end = Integer.parseInt(iloc.getAttribute("endpos")); + SequenceFeature sf = new SequenceFeature("GENE", ftitle, start, end, + null); + seq.addSequenceFeature(sf); + } + setSeqs(sqs); + } + + } + + @Override + public String print(SequenceI[] s, boolean jvSuffix) + { + return "not yet implemented"; + } + +} diff --git a/src/jalview/io/FileFormat.java b/src/jalview/io/FileFormat.java index cb61740..df76c61 100644 --- a/src/jalview/io/FileFormat.java +++ b/src/jalview/io/FileFormat.java @@ -29,6 +29,21 @@ import java.io.IOException; public enum FileFormat implements FileFormatI { + BSML("BSML", "bbb", true, false) + { + @Override + public AlignmentFileReaderI getReader(FileParse source) + throws IOException + { + return new BSMLFile(source); + } + + @Override + public AlignmentFileWriterI getWriter(AlignmentI al) + { + return null; + } + }, Fasta("Fasta", "fa, fasta, mfa, fastq", true, true) { @Override diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index b312474..384797e 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -165,8 +165,8 @@ public class IdentifyFile if (source.inFile != null) { String fileStr = source.inFile.getName(); - if (fileStr.contains(".jar") - || fileStr.contains(".zip") || fileStr.contains(".jvp")) + if (fileStr.contains(".jar") || fileStr.contains(".zip") + || fileStr.contains(".jvp")) { // possibly a Jalview archive (but check further) reply = FileFormat.Jalview; @@ -319,6 +319,11 @@ public class IdentifyFile if ((lessThan > -1)) // possible Markup Language data i.e HTML, // RNAML, XML { + if (data.startsWith("")) + { + reply = FileFormat.BSML; + break; + } String upper = data.toUpperCase(); if (upper.substring(lessThan).startsWith("