From: gmungoc Date: Sat, 21 Nov 2015 19:24:24 +0000 (+0000) Subject: JAL-653 JAL-1968 FeaturesFile now handles Jalview or GFF2 or GFF3 X-Git-Tag: Release_2_10_0~296^2~130 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=26ba864a6c290121fe6cf616794d2d0bea65fb7d;p=jalview.git JAL-653 JAL-1968 FeaturesFile now handles Jalview or GFF2 or GFF3 (partially); feature links option removed --- diff --git a/examples/exampleFeatures.txt b/examples/exampleFeatures.txt index 0bb8b7e..dfadb50 100755 --- a/examples/exampleFeatures.txt +++ b/examples/exampleFeatures.txt @@ -1,23 +1,5 @@ -#------------------------------------------------------------------------------- -# Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) -# Copyright (C) $$Year-Rel$$ The Jalview Authors -# -# This file is part of Jalview. -# -# Jalview is free software: you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -# -# Jalview is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with Jalview. If not, see . -# The Jalview Authors are detailed in the 'AUTHORS' file. -#------------------------------------------------------------------------------- -ST-TURN-IIL 705b23 -GAMMA-TURN-CLASSIC 788763 +ST-TURN-IIL blue|255,0,255|absolute|20.0|95.0|below|66.0 +GAMMA-TURN-CLASSIC red|0,255,255|20.0|95.0|below|66.0 BETA-TURN-IR 9a6a94 BETA-TURN-IL d6a6ca BETA-BULGE 1dc451 diff --git a/examples/testdata/exonerateoutput.gff b/examples/testdata/exonerateoutput.gff index 3ea68dc..bf3349f 100644 --- a/examples/testdata/exonerateoutput.gff +++ b/examples/testdata/exonerateoutput.gff @@ -1,3 +1,7 @@ +# (exonerate delimits GFF with [START|END] OF GFF DUMP) +# --- START OF GFF DUMP --- +# +# ##gff-version 2 ##source-version exonerate:protein2genome:local 2.2.0 ##date 2015-01-16 @@ -10,4 +14,6 @@ contig_1146 exonerate:protein2genome:local gene 8534 11269 3652 - . gene_id 0 ; contig_1146 exonerate:protein2genome:local cds 8534 11269 . - . contig_1146 exonerate:protein2genome:local exon 8534 11269 . - . insertions 3 ; deletions 6 contig_1146 exonerate:protein2genome:local similarity 8534 11269 3652 - . alignment_id 0 ; Query DDB_G0269124 ; Align 11270 143 120 ; Align 11150 187 282 ; Align 10865 281 888 ; Align 9977 578 1068 ; Align 8909 935 375 +# and a made-up alignment to a sequence in exonerateseqs.fa +contig_1146 exonerate:protein2genome:local similarity 8534 11269 3652 - . alignment_id 0 ; Query DDB_G0280897 ; Align 11270 143 120 # --- END OF GFF DUMP --- diff --git a/examples/testdata/simplegff3.gff b/examples/testdata/simpleGff3.gff similarity index 64% rename from examples/testdata/simplegff3.gff rename to examples/testdata/simpleGff3.gff index 2ac5421..0d85293 100644 --- a/examples/testdata/simplegff3.gff +++ b/examples/testdata/simpleGff3.gff @@ -1,15 +1,22 @@ ##gff-version 2 +# exonerate output in gff2 format; not gff3 because +# - 'similarity' is not a Sequence Ontology term +# - attributes' name/values are separated by space ' ' not equals '=' ##source-version exonerate:protein2genome:local 2.2.0 ##date 2015-01-16 ##type DNA # -# +# tab-delimited # seqname source feature start end score strand frame attributes # seq1 exonerate:protein2genome:local gene 8 11 3652 - . gene_id 0 ; sequence seq2 ; gene_orientation . seq1 exonerate:protein2genome:local cds 9 11 . - . seq1 exonerate:protein2genome:local exon 9 11 . - . insertions 3 ; deletions 6 seq1 exonerate:protein2genome:local similarity 8 11 3652 - . alignment_id 0 ; Query seq2 ; Align 11 1 3 +# +# appending FASTA sequences is strictly a GFF3 format feature +# but Jalview is able to handle this mixture of GFF2 / GFF3 :-) +# ##FASTA >seq1 ACTACGACACGACGACGACGACG diff --git a/help/html/features/featuresFormat.html b/help/html/features/featuresFormat.html index f5f854c..9f33b7b 100755 --- a/help/html/features/featuresFormat.html +++ b/help/html/features/featuresFormat.html @@ -141,7 +141,7 @@ This format allows two alternate ways of referring to a sequence, - either by its text ID, or its index in an associated alignment. + either by its text ID, or its index (base 0) in an associated alignment. Normally, sequence features are associated with sequences rather than alignments, and the sequenceIndex field is given as "-1". In order to specify a sequence by its index in a particular alignment, diff --git a/src/jalview/appletgui/AlignFrame.java b/src/jalview/appletgui/AlignFrame.java index 80ab5d5..84e9087 100644 --- a/src/jalview/appletgui/AlignFrame.java +++ b/src/jalview/appletgui/AlignFrame.java @@ -364,18 +364,15 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, public boolean parseFeaturesFile(String file, String type, boolean autoenabledisplay) { - // TODO: test if importing a features file onto an alignment which already - // has features with links overwrites the original links. - - Hashtable featureLinks = new Hashtable(); boolean featuresFile = false; try { - featuresFile = new jalview.io.FeaturesFile(file, type).parse(viewport - .getAlignment(), alignPanel.seqPanel.seqCanvas - .getFeatureRenderer().getFeatureColours(), featureLinks, - true, viewport.applet.getDefaultParameter("relaxedidmatch", - false)); + Map colours = alignPanel.seqPanel.seqCanvas + .getFeatureRenderer().getFeatureColours(); + boolean relaxedIdMatching = viewport.applet.getDefaultParameter( + "relaxedidmatch", false); + featuresFile = new FeaturesFile(file, type).parse( + viewport.getAlignment(), colours, true, relaxedIdMatching); } catch (Exception ex) { ex.printStackTrace(); @@ -383,10 +380,6 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, if (featuresFile) { - if (featureLinks.size() > 0) - { - alignPanel.seqPanel.seqCanvas.getFeatureRenderer().featureLinks = featureLinks; - } if (autoenabledisplay) { viewport.setShowSequenceFeatures(true); @@ -1404,15 +1397,16 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, public String outputFeatures(boolean displayTextbox, String format) { String features; + FeaturesFile formatter = new FeaturesFile(); if (format.equalsIgnoreCase("Jalview")) { - features = new FeaturesFile().printJalviewFormat(viewport + features = formatter.printJalviewFormat(viewport .getAlignment().getSequencesArray(), getDisplayedFeatureCols()); } else { - features = new FeaturesFile().printGFFFormat(viewport.getAlignment() + features = formatter.printGffFormat(viewport.getAlignment() .getSequencesArray(), getDisplayedFeatureCols()); } @@ -2609,6 +2603,7 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, } + @Override public void changeColour(ColourSchemeI cs) { @@ -3759,6 +3754,7 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, this.add(statusBar, BorderLayout.SOUTH); } + @Override public void setStatus(String string) { statusBar.setText(string); diff --git a/src/jalview/appletgui/CutAndPasteTransfer.java b/src/jalview/appletgui/CutAndPasteTransfer.java index 70a7319..393506c 100644 --- a/src/jalview/appletgui/CutAndPasteTransfer.java +++ b/src/jalview/appletgui/CutAndPasteTransfer.java @@ -223,7 +223,7 @@ public class CutAndPasteTransfer extends Panel implements ActionListener, { AlignmentI al = null; - String format = new IdentifyFile().Identify(text, + String format = new IdentifyFile().identify(text, AppletFormatAdapter.PASTE); AppletFormatAdapter afa = new AppletFormatAdapter(alignFrame.alignPanel); try diff --git a/src/jalview/appletgui/FeatureRenderer.java b/src/jalview/appletgui/FeatureRenderer.java index 4655ba5..8318ee3 100644 --- a/src/jalview/appletgui/FeatureRenderer.java +++ b/src/jalview/appletgui/FeatureRenderer.java @@ -43,7 +43,7 @@ import java.awt.TextArea; import java.awt.TextField; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; -import java.util.Hashtable; +import java.util.Map; /** * DOCUMENT ME! @@ -57,7 +57,7 @@ public class FeatureRenderer extends // Holds web links for feature groups and feature types // in the form label|link - Hashtable featureLinks = null; + Map featureLinks = null; /** * Creates a new FeatureRenderer object. @@ -154,6 +154,7 @@ public class FeatureRenderer extends super(null); } + @Override public void paint(Graphics g) { Dimension d = getSize(); @@ -227,6 +228,7 @@ public class FeatureRenderer extends overlaps.addItemListener(new java.awt.event.ItemListener() { + @Override public void itemStateChanged(java.awt.event.ItemEvent e) { int index = overlaps.getSelectedIndex(); @@ -344,6 +346,7 @@ public class FeatureRenderer extends dialog.buttonPanel.add(deleteButton, 1); deleteButton.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent evt) { deleteFeature = true; @@ -370,6 +373,7 @@ public class FeatureRenderer extends // TODO: render the graduated color in the box. colourPanel.addMouseListener(new java.awt.event.MouseAdapter() { + @Override public void mousePressed(java.awt.event.MouseEvent evt) { if (!colourPanel.isGcol) diff --git a/src/jalview/bin/Jalview.java b/src/jalview/bin/Jalview.java index 462f5a7..8fe3bca 100755 --- a/src/jalview/bin/Jalview.java +++ b/src/jalview/bin/Jalview.java @@ -377,7 +377,7 @@ public class Jalview protocol = jalview.io.AppletFormatAdapter.checkProtocol(file); - format = new jalview.io.IdentifyFile().Identify(file, protocol); + format = new jalview.io.IdentifyFile().identify(file, protocol); AlignFrame af = fileLoader.LoadFileWaitTillLoaded(file, protocol, format); @@ -627,7 +627,7 @@ public class Jalview } else { - format = new jalview.io.IdentifyFile().Identify(file, protocol); + format = new jalview.io.IdentifyFile().identify(file, protocol); } startUpAlframe = fileLoader.LoadFileWaitTillLoaded(file, protocol, diff --git a/src/jalview/bin/JalviewLite.java b/src/jalview/bin/JalviewLite.java index 36a7cff..ae84ba5 100644 --- a/src/jalview/bin/JalviewLite.java +++ b/src/jalview/bin/JalviewLite.java @@ -850,7 +850,7 @@ public class JalviewLite extends Applet implements { AlignmentI al = null; - String format = new IdentifyFile().Identify(text, + String format = new IdentifyFile().identify(text, AppletFormatAdapter.PASTE); try { @@ -1967,7 +1967,7 @@ public class JalviewLite extends Applet implements return null; } String resolvedFile = resolveFileProtocol(fileParam); - String format = new IdentifyFile().Identify(resolvedFile, protocol); + String format = new IdentifyFile().identify(resolvedFile, protocol); dbgMsg("File identified as '" + format + "'"); AlignmentI al = null; try diff --git a/src/jalview/bin/JalviewLiteURLRetrieve.java b/src/jalview/bin/JalviewLiteURLRetrieve.java index 6be1016..fd88028 100644 --- a/src/jalview/bin/JalviewLiteURLRetrieve.java +++ b/src/jalview/bin/JalviewLiteURLRetrieve.java @@ -113,7 +113,7 @@ public class JalviewLiteURLRetrieve extends Applet String format = getParameter("format"); if (format == null || format.length() == 0) { - format = new jalview.io.IdentifyFile().Identify(file, protocol); + format = new jalview.io.IdentifyFile().identify(file, protocol); System.out.println("Format is " + format); } else diff --git a/src/jalview/controller/AlignViewController.java b/src/jalview/controller/AlignViewController.java index a39dc80..e4f11e0 100644 --- a/src/jalview/controller/AlignViewController.java +++ b/src/jalview/controller/AlignViewController.java @@ -394,7 +394,7 @@ public class AlignViewController implements AlignViewControllerI boolean featuresFile = false; try { - featuresFile = new FeaturesFile(file, protocol).parse(viewport + featuresFile = new FeaturesFile(false, file, protocol).parse(viewport .getAlignment().getDataset(), alignPanel.getFeatureRenderer() .getFeatureColours(), false, relaxedIdMatching); } catch (Exception ex) diff --git a/src/jalview/datamodel/SequenceDummy.java b/src/jalview/datamodel/SequenceDummy.java index 7e3c187..afe2961 100644 --- a/src/jalview/datamodel/SequenceDummy.java +++ b/src/jalview/datamodel/SequenceDummy.java @@ -20,7 +20,7 @@ */ package jalview.datamodel; -public class SequenceDummy extends Sequence implements SequenceI +public class SequenceDummy extends Sequence { public SequenceDummy(String sequenceId) { diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 1b6498f..5fadb6f 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -20,7 +20,8 @@ */ package jalview.datamodel; -import java.util.Hashtable; +import java.util.HashMap; +import java.util.Map; import java.util.Vector; /** @@ -41,7 +42,7 @@ public class SequenceFeature public String description; - public Hashtable otherDetails; + public Map otherDetails; public Vector links; @@ -54,9 +55,9 @@ public class SequenceFeature } /** - * Constructs a duplicate feature. Note: Uses clone on the otherDetails so - * only shallow copies are made of additional properties and method will - * silently fail if unclonable objects are found in the hash. + * Constructs a duplicate feature. Note: Uses makes a shallow copy of the + * otherDetails map, so the new and original SequenceFeature may reference the + * same objects in the map. * * @param cpy */ @@ -83,10 +84,11 @@ public class SequenceFeature { try { - otherDetails = (Hashtable) cpy.otherDetails.clone(); + otherDetails = (Map) ((HashMap) cpy.otherDetails) + .clone(); } catch (Exception e) { - // Uncloneable objects in the otherDetails - don't complain + // ignore } } if (cpy.links != null && cpy.links.size() > 0) @@ -248,6 +250,20 @@ public class SequenceFeature } /** + * Returns a property value for the given key if known, else the specified + * default value + * + * @param key + * @param defaultValue + * @return + */ + public Object getValue(String key, Object defaultValue) + { + Object value = getValue(key); + return value == null ? defaultValue : value; + } + + /** * Used for setting values which are not in the basic set. eg STRAND, FRAME * for GFF file * @@ -262,7 +278,7 @@ public class SequenceFeature { if (otherDetails == null) { - otherDetails = new Hashtable(); + otherDetails = new HashMap(); } otherDetails.put(key, value); diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 5c29b9b..4e5083d 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -845,6 +845,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * operation that affects the data in the current view (selection changed, * etc) to update the menus to reflect the new state. */ + @Override public void setMenusForViewport() { setMenusFromViewport(viewport); @@ -1401,6 +1402,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, alignPanel.makeEPS(f); } + @Override public void createSVG(File f) { alignPanel.makeSVG(f); @@ -1574,6 +1576,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } } + @Override public void addHistoryItem(CommandI command) { if (command.getSize() > 0) @@ -1977,7 +1980,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, return; } - format = new IdentifyFile().Identify(str, "Paste"); + format = new IdentifyFile().identify(str, "Paste"); } catch (OutOfMemoryError er) { @@ -3497,6 +3500,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * @param cs * DOCUMENT ME! */ + @Override public void changeColour(ColourSchemeI cs) { // TODO: pull up to controller method @@ -5097,7 +5101,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, String type = null; try { - type = new IdentifyFile().Identify(file, protocol); + type = new IdentifyFile().identify(file, protocol); } catch (Exception ex) { type = null; @@ -5196,7 +5200,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, /** * Attempt to load a "dropped" file or URL string: First by testing whether - * it's and Annotation file, then a JNet file, and finally a features file. If + * it's an Annotation file, then a JNet file, and finally a features file. If * all are false then the user may have dropped an alignment file onto this * AlignFrame. * @@ -5210,7 +5214,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { if (protocol == null) { - protocol = jalview.io.FormatAdapter.checkProtocol(file); + protocol = FormatAdapter.checkProtocol(file); } // if the file isn't identified, or not positively identified as some // other filetype (PFAM is default unidentified alignment file type) then @@ -5271,7 +5275,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, // try to parse it as a features file if (format == null) { - format = new IdentifyFile().Identify(file, protocol); + format = new IdentifyFile().identify(file, protocol); } if (format.equalsIgnoreCase("JnetFile")) { @@ -5287,42 +5291,17 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, viewport.setColumnSelection(cs); isAnnotation = true; } - else + else if (IdentifyFile.FeaturesFile.equals(format)) { - /* - * if (format.equalsIgnoreCase("PDB")) { - * - * String pdbfn = ""; // try to match up filename with sequence id - * try { if (protocol == jalview.io.FormatAdapter.FILE) { File fl = - * new File(file); pdbfn = fl.getName(); } else if (protocol == - * jalview.io.FormatAdapter.URL) { URL url = new URL(file); pdbfn = - * url.getFile(); } } catch (Exception e) { } ; if (assocSeq == - * null) { SequenceIdMatcher idm = new SequenceIdMatcher(viewport - * .getAlignment().getSequencesArray()); if (pdbfn.length() > 0) { - * // attempt to find a match in the alignment SequenceI mtch = - * idm.findIdMatch(pdbfn); int l = 0, c = pdbfn.indexOf("."); while - * (mtch == null && c != -1) { while ((c = pdbfn.indexOf(".", l)) > - * l) { l = c; } if (l > -1) { pdbfn = pdbfn.substring(0, l); } mtch - * = idm.findIdMatch(pdbfn); } if (mtch != null) { // try and - * associate // prompt ? PDBEntry pe = new AssociatePdbFileWithSeq() - * .associatePdbWithSeq(file, protocol, mtch, true); if (pe != null) - * { System.err.println("Associated file : " + file + " with " + - * mtch.getDisplayId(true)); alignPanel.paintAlignment(true); } } // - * TODO: maybe need to load as normal otherwise return; } } - */ - // try to parse it as a features file - boolean isGroupsFile = parseFeaturesFile(file, protocol); - // if it wasn't a features file then we just treat it as a general - // alignment file to load into the current view. - if (!isGroupsFile) + if (parseFeaturesFile(file, protocol)) { - new FileLoader().LoadFile(viewport, file, protocol, format); + alignPanel.paintAlignment(true); } + } else { - alignPanel.paintAlignment(true); + new FileLoader().LoadFile(viewport, file, protocol, format); } - } } } if (isAnnotation) @@ -5344,7 +5323,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } catch (Exception x) { } - ; new OOMWarning( "loading data " + (protocol != null ? (protocol.equals(FormatAdapter.PASTE) ? "from clipboard." @@ -5755,6 +5733,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, viewport.firePropertyChange("alignment", null, al); } + @Override public void setShowSeqFeatures(boolean b) { showSeqFeatures.setSelected(b); diff --git a/src/jalview/gui/AnnotationExporter.java b/src/jalview/gui/AnnotationExporter.java index 643d8a0..136a38e 100644 --- a/src/jalview/gui/AnnotationExporter.java +++ b/src/jalview/gui/AnnotationExporter.java @@ -21,6 +21,7 @@ package jalview.gui; import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.SequenceI; import jalview.io.AnnotationFile; import jalview.io.FeaturesFile; import jalview.io.JalviewFileChooser; @@ -32,6 +33,7 @@ import java.awt.Color; import java.awt.FlowLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.util.Map; import javax.swing.BorderFactory; import javax.swing.ButtonGroup; @@ -153,17 +155,21 @@ public class AnnotationExporter extends JPanel .getString("label.no_features_on_alignment"); if (features) { + FeaturesFile formatter = new FeaturesFile(); + SequenceI[] sequences = ap.av.getAlignment().getDataset() + .getSequencesArray(); + Map featureColours = ap.getFeatureRenderer() + .getDisplayedFeatureCols(); + boolean includeNonPositional = ap.av.isShowNPFeats(); if (GFFFormat.isSelected()) { - text = new FeaturesFile().printGFFFormat(ap.av.getAlignment() - .getDataset().getSequencesArray(), ap.getFeatureRenderer() - .getDisplayedFeatureCols(), true, ap.av.isShowNPFeats());// ap.av.featuresDisplayed//); + text = formatter.printGffFormat(sequences, featureColours, true, + includeNonPositional); } else { - text = new FeaturesFile().printJalviewFormat(ap.av.getAlignment() - .getDataset().getSequencesArray(), ap.getFeatureRenderer() - .getDisplayedFeatureCols(), true, ap.av.isShowNPFeats()); // ap.av.featuresDisplayed); + text = formatter.printJalviewFormat(sequences, featureColours, + true, includeNonPositional); } } else @@ -236,6 +242,7 @@ public class AnnotationExporter extends JPanel toFile.setText(MessageManager.getString("label.to_file")); toFile.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { toFile_actionPerformed(e); @@ -244,6 +251,7 @@ public class AnnotationExporter extends JPanel toTextbox.setText(MessageManager.getString("label.to_textbox")); toTextbox.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { toTextbox_actionPerformed(e); @@ -252,6 +260,7 @@ public class AnnotationExporter extends JPanel close.setText(MessageManager.getString("action.close")); close.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { close_actionPerformed(e); diff --git a/src/jalview/gui/CutAndPasteTransfer.java b/src/jalview/gui/CutAndPasteTransfer.java index 4541fc2..ff3e0f2 100644 --- a/src/jalview/gui/CutAndPasteTransfer.java +++ b/src/jalview/gui/CutAndPasteTransfer.java @@ -206,7 +206,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer return; } - String format = new IdentifyFile().Identify(text, "Paste"); + String format = new IdentifyFile().identify(text, "Paste"); if (format == null || format.equalsIgnoreCase("EMPTY DATA FILE")) { System.err.println(MessageManager diff --git a/src/jalview/gui/Desktop.java b/src/jalview/gui/Desktop.java index d3b8afc..a10ec4e 100644 --- a/src/jalview/gui/Desktop.java +++ b/src/jalview/gui/Desktop.java @@ -631,7 +631,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements String file = (String) contents .getTransferData(DataFlavor.stringFlavor); - String format = new IdentifyFile().Identify(file, + String format = new IdentifyFile().identify(file, FormatAdapter.PASTE); new FileLoader().LoadFile(file, FormatAdapter.PASTE, format); @@ -977,7 +977,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements } else { - format = new IdentifyFile().Identify(file, protocol); + format = new IdentifyFile().identify(file, protocol); } new FileLoader().LoadFile(file, protocol, format); @@ -1028,7 +1028,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements } else { - format = new IdentifyFile().Identify(choice, FormatAdapter.FILE); + format = new IdentifyFile().identify(choice, FormatAdapter.FILE); } if (viewport != null) @@ -1111,7 +1111,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements } else { - String format = new IdentifyFile().Identify(url, FormatAdapter.URL); + String format = new IdentifyFile().identify(url, FormatAdapter.URL); if (format.equals("URL NOT FOUND")) { diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index 50a8167..7258b63 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -761,10 +761,11 @@ public class Jalview2XML if (sf[index].otherDetails != null) { String key; - Enumeration keys = sf[index].otherDetails.keys(); - while (keys.hasMoreElements()) + Iterator keys = sf[index].otherDetails.keySet() + .iterator(); + while (keys.hasNext()) { - key = keys.nextElement().toString(); + key = keys.next(); OtherData keyValue = new OtherData(); keyValue.setKey(key); keyValue.setValue(sf[index].otherDetails.get(key).toString()); @@ -2178,6 +2179,7 @@ public class Jalview2XML { SwingUtilities.invokeAndWait(new Runnable() { + @Override public void run() { setLoadingFinishedForNewStructureViewers(); diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index a973757..207d2bc 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -790,7 +790,7 @@ public class SequenceFetcher extends JPanel implements Runnable AlignmentI parseResult(String result, String title) { - String format = new IdentifyFile().Identify(result, "Paste"); + String format = new IdentifyFile().identify(result, "Paste"); AlignmentI sequences = null; if (FormatAdapter.isValidFormat(format)) { diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java index 239c531..e34093d 100755 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -86,7 +86,7 @@ public class AppletFormatAdapter public static final String[] READABLE_FORMATS = new String[] { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC, JSONFile.FILE_DESC, - IdentifyFile.GFF3File, "HTML" }; + IdentifyFile.FeaturesFile, "HTML" }; /** * List of readable format file extensions by application in order @@ -103,7 +103,7 @@ public class AppletFormatAdapter */ public static final String[] READABLE_FNAMES = new String[] { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Stockholm", "RNAML", - PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.GFF3File, + PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.FeaturesFile, "Jalview", HtmlFile.FILE_DESC }; /** @@ -306,9 +306,9 @@ public class AppletFormatAdapter { alignFile = new RnamlFile(inFile, type); } - else if (format.equals(IdentifyFile.GFF3File)) + else if (format.equals(IdentifyFile.FeaturesFile)) { - alignFile = new Gff3File(inFile, type); + alignFile = new FeaturesFile(true, inFile, type); } return buildAlignmentFrom(alignFile); } catch (Exception e) @@ -426,9 +426,10 @@ public class AppletFormatAdapter { alignFile = new PhylipFile(source); } - else if (format.equals(IdentifyFile.GFF3File)) + else if (format.equals(IdentifyFile.FeaturesFile)) { - alignFile = new Gff3File(inFile, type); + // enforce 'parseImmediately' here: + alignFile = new FeaturesFile(true, inFile, type); } else if (format.equals(JSONFile.FILE_DESC)) { @@ -669,7 +670,7 @@ public class AppletFormatAdapter long memf = -r.totalMemory() + r.freeMemory(); long t1 = -System.currentTimeMillis(); AlignmentI al = afa.readFile(args[i], FILE, - new IdentifyFile().Identify(args[i], FILE)); + new IdentifyFile().identify(args[i], FILE)); t1 += System.currentTimeMillis(); System.gc(); memf += r.totalMemory() - r.freeMemory(); @@ -835,7 +836,7 @@ public class AppletFormatAdapter { try { - String idformat = new jalview.io.IdentifyFile().Identify(file, + String idformat = new jalview.io.IdentifyFile().identify(file, protocol); if (idformat == null) { diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index a670e6e..ee6ba11 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -21,7 +21,9 @@ package jalview.io; import jalview.analysis.SequenceIdMatcher; +import jalview.api.AlignViewportI; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; @@ -31,36 +33,55 @@ import jalview.schemes.GraduatedColor; import jalview.schemes.UserColourScheme; import jalview.util.Format; import jalview.util.MapList; +import jalview.util.ParseHtmlBodyAndLinks; +import jalview.util.StringUtils; +import java.awt.Color; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.StringTokenizer; -import java.util.Vector; /** - * Parse and create Jalview Features files Detects GFF format features files and - * parses. Does not implement standard print() - call specific printFeatures or - * printGFF. Uses AlignmentI.findSequence(String id) to find the sequence object - * for the features annotation - this normally works on an exact match. + * Parses and writes features files, which may be in Jalview, GFF2 or GFF3 + * format. These are tab-delimited formats but with differences in the use of + * columns. + * + * A Jalview feature file may define feature colours and then declare that the + * remainder of the file is in GFF format with the line 'GFF'. + * + * GFF3 files may include alignment mappings for features, which Jalview will + * attempt to model, and may include sequence data following a ##FASTA line. + * * * @author AMW - * @version $Revision$ + * @author jbprocter + * @author gmcarstairs */ public class FeaturesFile extends AlignFile { - /** - * work around for GFF interpretation bug where source string becomes - * description rather than a group - */ - private boolean doGffSource = true; + protected static final String STRAND = "STRAND"; + + protected static final String FRAME = "FRAME"; + + protected static final String ATTRIBUTES = "ATTRIBUTES"; + + protected static final String TAB = "\t"; - private int gffversion; + protected static final String GFF_VERSION = "##gff-version"; + + private AlignmentI lastmatchedAl = null; + + private SequenceIdMatcher matcher = null; + + protected AlignmentI dataset; + + protected int gffVersion; /** * Creates a new FeaturesFile object. @@ -70,13 +91,15 @@ public class FeaturesFile extends AlignFile } /** + * Constructor which does not parse the file immediately + * * @param inFile * @param type * @throws IOException */ public FeaturesFile(String inFile, String type) throws IOException { - super(inFile, type); + super(false, inFile, type); } /** @@ -89,17 +112,8 @@ public class FeaturesFile extends AlignFile } /** - * @param parseImmediately - * @param source - * @throws IOException - */ - public FeaturesFile(boolean parseImmediately, FileParse source) - throws IOException - { - super(parseImmediately, source); - } - - /** + * Constructor that optionally parses the file immediately + * * @param parseImmediately * @param inFile * @param type @@ -123,562 +137,122 @@ public class FeaturesFile extends AlignFile * - process html strings into plain text * @return true if features were added */ - public boolean parse(AlignmentI align, Map colours, boolean removeHTML) - { - return parse(align, colours, null, removeHTML, false); - } - - /** - * Parse GFF or sequence features file optionally using case-independent - * matching, discarding URLs - * - * @param align - * - alignment/dataset containing sequences that are to be annotated - * @param colours - * - hashtable to store feature colour definitions - * @param removeHTML - * - process html strings into plain text - * @param relaxedIdmatching - * - when true, ID matches to compound sequence IDs are allowed - * @return true if features were added - */ - public boolean parse(AlignmentI align, Map colours, boolean removeHTML, - boolean relaxedIdMatching) + public boolean parse(AlignmentI align, Map colours, + boolean removeHTML) { - return parse(align, colours, null, removeHTML, relaxedIdMatching); + return parse(align, colours, removeHTML, false); } /** - * Parse GFF or sequence features file optionally using case-independent - * matching - * - * @param align - * - alignment/dataset containing sequences that are to be annotated - * @param colours - * - hashtable to store feature colour definitions - * @param featureLink - * - hashtable to store associated URLs - * @param removeHTML - * - process html strings into plain text - * @return true if features were added + * Extends the default addProperties by also adding peptide-to-cDNA mappings + * (if any) derived while parsing a GFF file */ - public boolean parse(AlignmentI align, Map colours, Map featureLink, - boolean removeHTML) - { - return parse(align, colours, featureLink, removeHTML, false); - } - - @Override - public void addAnnotations(AlignmentI al) - { - super.addAnnotations(al); - } - @Override public void addProperties(AlignmentI al) { super.addProperties(al); - } - - @Override - public void addSeqGroups(AlignmentI al) - { - super.addSeqGroups(al); + if (dataset != null && dataset.getCodonFrames() != null) + { + AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset(); + for (AlignedCodonFrame codons : dataset.getCodonFrames()) + { + ds.addCodonFrame(codons); + } + } } /** - * Parse GFF or sequence features file + * Parse GFF or Jalview format sequence features file * * @param align * - alignment/dataset containing sequences that are to be annotated * @param colours * - hashtable to store feature colour definitions - * @param featureLink - * - hashtable to store associated URLs * @param removeHTML * - process html strings into plain text * @param relaxedIdmatching * - when true, ID matches to compound sequence IDs are allowed * @return true if features were added */ - public boolean parse(AlignmentI align, Map colours, Map featureLink, + public boolean parse(AlignmentI align, Map colours, boolean removeHTML, boolean relaxedIdmatching) { + Map gffProps = new HashMap(); + /* + * keep track of any sequences we try to create from the data + */ + List newseqs = new ArrayList(); String line = null; try { - SequenceI seq = null; - /** - * keep track of any sequences we try to create from the data if it is a - * GFF3 file - */ - ArrayList newseqs = new ArrayList(); - String type, desc, token = null; - - int index, start, end; - float score; StringTokenizer st; - SequenceFeature sf; - String featureGroup = null, groupLink = null; - Map typeLink = new Hashtable(); - /** - * when true, assume GFF style features rather than Jalview style. - */ - boolean GFFFile = true; - Map gffProps = new HashMap(); + String featureGroup = null; + while ((line = nextLine()) != null) { // skip comments/process pragmas - if (line.startsWith("#")) + if (line.length() == 0 || line.startsWith("#")) { - if (line.startsWith("##")) + if (line.toLowerCase().startsWith("##")) { - // possibly GFF2/3 version and metadata header processGffPragma(line, gffProps, align, newseqs); - line = ""; } continue; } - st = new StringTokenizer(line, "\t"); + st = new StringTokenizer(line, TAB); if (st.countTokens() == 1) { if (line.trim().equalsIgnoreCase("GFF")) { - // Start parsing file as if it might be GFF again. - GFFFile = true; + /* + * Jalview features file with appendded GFF + * assume GFF2 (though it may declare gff-version 3) + */ + gffVersion = 2; continue; } } + if (st.countTokens() > 1 && st.countTokens() < 4) { - GFFFile = false; - type = st.nextToken(); - if (type.equalsIgnoreCase("startgroup")) + /* + * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or + * a feature type colour specification; not GFF format + */ + String ft = st.nextToken(); + if (ft.equalsIgnoreCase("startgroup")) { featureGroup = st.nextToken(); - if (st.hasMoreElements()) - { - groupLink = st.nextToken(); - featureLink.put(featureGroup, groupLink); - } } - else if (type.equalsIgnoreCase("endgroup")) + else if (ft.equalsIgnoreCase("endgroup")) { // We should check whether this is the current group, // but at present theres no way of showing more than 1 group st.nextToken(); featureGroup = null; - groupLink = null; } else { - Object colour = null; - String colscheme = st.nextToken(); - if (colscheme.indexOf("|") > -1 - || colscheme.trim().equalsIgnoreCase("label")) - { - // Parse '|' separated graduated colourscheme fields: - // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue] - // can either provide 'label' only, first is optional, next two - // colors are required (but may be - // left blank), next is optional, nxt two min/max are required. - // first is either 'label' - // first/second and third are both hexadecimal or word equivalent - // colour. - // next two are values parsed as floats. - // fifth is either 'above','below', or 'none'. - // sixth is a float value and only required when fifth is either - // 'above' or 'below'. - StringTokenizer gcol = new StringTokenizer(colscheme, "|", - true); - // set defaults - int threshtype = AnnotationColourGradient.NO_THRESHOLD; - float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN; - boolean labelCol = false; - // Parse spec line - String mincol = gcol.nextToken(); - if (mincol == "|") - { - System.err - .println("Expected either 'label' or a colour specification in the line: " - + line); - continue; - } - String maxcol = null; - if (mincol.toLowerCase().indexOf("label") == 0) - { - labelCol = true; - mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip - // '|' - mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); - } - String abso = null, minval, maxval; - if (mincol != null) - { - // at least four more tokens - if (mincol.equals("|")) - { - mincol = ""; - } - else - { - gcol.nextToken(); // skip next '|' - } - // continue parsing rest of line - maxcol = gcol.nextToken(); - if (maxcol.equals("|")) - { - maxcol = ""; - } - else - { - gcol.nextToken(); // skip next '|' - } - abso = gcol.nextToken(); - gcol.nextToken(); // skip next '|' - if (abso.toLowerCase().indexOf("abso") != 0) - { - minval = abso; - abso = null; - } - else - { - minval = gcol.nextToken(); - gcol.nextToken(); // skip next '|' - } - maxval = gcol.nextToken(); - if (gcol.hasMoreTokens()) - { - gcol.nextToken(); // skip next '|' - } - try - { - if (minval.length() > 0) - { - min = new Float(minval).floatValue(); - } - } catch (Exception e) - { - System.err - .println("Couldn't parse the minimum value for graduated colour for type (" - + colscheme - + ") - did you misspell 'auto' for the optional automatic colour switch ?"); - e.printStackTrace(); - } - try - { - if (maxval.length() > 0) - { - max = new Float(maxval).floatValue(); - } - } catch (Exception e) - { - System.err - .println("Couldn't parse the maximum value for graduated colour for type (" - + colscheme + ")"); - e.printStackTrace(); - } - } - else - { - // add in some dummy min/max colours for the label-only - // colourscheme. - mincol = "FFFFFF"; - maxcol = "000000"; - } - try - { - colour = new jalview.schemes.GraduatedColor( - new UserColourScheme(mincol).findColour('A'), - new UserColourScheme(maxcol).findColour('A'), min, - max); - } catch (Exception e) - { - System.err - .println("Couldn't parse the graduated colour scheme (" - + colscheme + ")"); - e.printStackTrace(); - } - if (colour != null) - { - ((jalview.schemes.GraduatedColor) colour) - .setColourByLabel(labelCol); - ((jalview.schemes.GraduatedColor) colour) - .setAutoScaled(abso == null); - // add in any additional parameters - String ttype = null, tval = null; - if (gcol.hasMoreTokens()) - { - // threshold type and possibly a threshold value - ttype = gcol.nextToken(); - if (ttype.toLowerCase().startsWith("below")) - { - ((jalview.schemes.GraduatedColor) colour) - .setThreshType(AnnotationColourGradient.BELOW_THRESHOLD); - } - else if (ttype.toLowerCase().startsWith("above")) - { - ((jalview.schemes.GraduatedColor) colour) - .setThreshType(AnnotationColourGradient.ABOVE_THRESHOLD); - } - else - { - ((jalview.schemes.GraduatedColor) colour) - .setThreshType(AnnotationColourGradient.NO_THRESHOLD); - if (!ttype.toLowerCase().startsWith("no")) - { - System.err - .println("Ignoring unrecognised threshold type : " - + ttype); - } - } - } - if (((GraduatedColor) colour).getThreshType() != AnnotationColourGradient.NO_THRESHOLD) - { - try - { - gcol.nextToken(); - tval = gcol.nextToken(); - ((jalview.schemes.GraduatedColor) colour) - .setThresh(new Float(tval).floatValue()); - } catch (Exception e) - { - System.err - .println("Couldn't parse threshold value as a float: (" - + tval + ")"); - e.printStackTrace(); - } - } - // parse the thresh-is-min token ? - if (gcol.hasMoreTokens()) - { - System.err - .println("Ignoring additional tokens in parameters in graduated colour specification\n"); - while (gcol.hasMoreTokens()) - { - System.err.println("|" + gcol.nextToken()); - } - System.err.println("\n"); - } - } - } - else - { - UserColourScheme ucs = new UserColourScheme(colscheme); - colour = ucs.findColour('A'); - } - if (colour != null) - { - colours.put(type, colour); - } - if (st.hasMoreElements()) - { - String link = st.nextToken(); - typeLink.put(type, link); - if (featureLink == null) - { - featureLink = new Hashtable(); - } - featureLink.put(type, link); - } + parseFeatureColour(line, ft, st, colours); } continue; } - String seqId = ""; - while (st.hasMoreElements()) - { - - if (GFFFile) - { - // Still possible this is an old Jalview file, - // which does not have type colours at the beginning - seqId = token = st.nextToken(); - seq = findName(align, seqId, relaxedIdmatching, newseqs); - if (seq != null) - { - desc = st.nextToken(); - String group = null; - if (doGffSource && desc.indexOf(' ') == -1) - { - // could also be a source term rather than description line - group = new String(desc); - } - type = st.nextToken(); - try - { - String stt = st.nextToken(); - if (stt.length() == 0 || stt.equals("-")) - { - start = 0; - } - else - { - start = Integer.parseInt(stt); - } - } catch (NumberFormatException ex) - { - start = 0; - } - try - { - String stt = st.nextToken(); - if (stt.length() == 0 || stt.equals("-")) - { - end = 0; - } - else - { - end = Integer.parseInt(stt); - } - } catch (NumberFormatException ex) - { - end = 0; - } - // TODO: decide if non positional feature assertion for input data - // where end==0 is generally valid - if (end == 0) - { - // treat as non-positional feature, regardless. - start = 0; - } - try - { - score = new Float(st.nextToken()).floatValue(); - } catch (NumberFormatException ex) - { - score = 0; - } - - sf = new SequenceFeature(type, desc, start, end, score, group); - - try - { - sf.setValue("STRAND", st.nextToken()); - sf.setValue("FRAME", st.nextToken()); - } catch (Exception ex) - { - } - - if (st.hasMoreTokens()) - { - StringBuffer attributes = new StringBuffer(); - boolean sep = false; - while (st.hasMoreTokens()) - { - attributes.append((sep ? "\t" : "") + st.nextElement()); - sep = true; - } - // TODO validate and split GFF2 attributes field ? parse out - // ([A-Za-z][A-Za-z0-9_]*) ; and add as - // sf.setValue(attrib, val); - sf.setValue("ATTRIBUTES", attributes.toString()); - } - - if (processOrAddSeqFeature(align, newseqs, seq, sf, GFFFile, - relaxedIdmatching)) - { - // check whether we should add the sequence feature to any other - // sequences in the alignment with the same or similar - while ((seq = align.findName(seq, seqId, true)) != null) - { - seq.addSequenceFeature(new SequenceFeature(sf)); - } - } - break; - } - } - - if (GFFFile && seq == null) - { - desc = token; - } - else - { - desc = st.nextToken(); - } - if (!st.hasMoreTokens()) - { - System.err - .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up."); - // in all probability, this isn't a file we understand, so bail - // quietly. - return false; - } - - token = st.nextToken(); - - if (!token.equals("ID_NOT_SPECIFIED")) - { - seq = findName(align, seqId = token, relaxedIdmatching, null); - st.nextToken(); - } - else - { - seqId = null; - try - { - index = Integer.parseInt(st.nextToken()); - seq = align.getSequenceAt(index); - } catch (NumberFormatException ex) - { - seq = null; - } - } - - if (seq == null) - { - System.out.println("Sequence not found: " + line); - break; - } - - start = Integer.parseInt(st.nextToken()); - end = Integer.parseInt(st.nextToken()); - - type = st.nextToken(); - - if (!colours.containsKey(type)) - { - // Probably the old style groups file - UserColourScheme ucs = new UserColourScheme(type); - colours.put(type, ucs.findColour('A')); - } - sf = new SequenceFeature(type, desc, "", start, end, featureGroup); - if (st.hasMoreTokens()) - { - try - { - score = new Float(st.nextToken()).floatValue(); - // update colourgradient bounds if allowed to - } catch (NumberFormatException ex) - { - score = 0; - } - sf.setScore(score); - } - if (groupLink != null && removeHTML) - { - sf.addLink(groupLink); - sf.description += "%LINK%"; - } - if (typeLink.containsKey(type) && removeHTML) - { - sf.addLink(typeLink.get(type).toString()); - sf.description += "%LINK%"; - } - - parseDescriptionHTML(sf, removeHTML); - seq.addSequenceFeature(sf); - - while (seqId != null - && (seq = align.findName(seq, seqId, false)) != null) - { - seq.addSequenceFeature(new SequenceFeature(sf)); - } - // If we got here, its not a GFFFile - GFFFile = false; + /* + * if not a comment, GFF pragma, startgroup, endgroup or feature + * colour specification, that just leaves a feature details line + * in either Jalview or GFF format + */ + if (gffVersion == 0) + { + parseJalviewFeature(line, st, align, colours, removeHTML, + relaxedIdmatching, featureGroup); + } + else + { + parseGffFeature(st, align, relaxedIdmatching, newseqs); } } resetMatcher(); @@ -696,417 +270,351 @@ public class FeaturesFile extends AlignFile return true; } - private enum GffPragmas - { - gff_version, sequence_region, feature_ontology, attribute_ontology, source_ontology, species_build, fasta, hash - }; - - private static Map GFFPRAGMA; - static + /** + * Try to parse a Jalview format feature specification. Returns true if + * successful or false if not. + * + * @param line + * @param st + * @param alignment + * @param featureColours + * @param removeHTML + * @param relaxedIdmatching + * @param featureGroup + */ + protected boolean parseJalviewFeature(String line, StringTokenizer st, + AlignmentI alignment, Map featureColours, + boolean removeHTML, boolean relaxedIdmatching, String featureGroup) { - GFFPRAGMA = new HashMap(); - GFFPRAGMA.put("gff-version", GffPragmas.gff_version); - GFFPRAGMA.put("sequence-region", GffPragmas.sequence_region); - GFFPRAGMA.put("feature-ontology", GffPragmas.feature_ontology); - GFFPRAGMA.put("#", GffPragmas.hash); - GFFPRAGMA.put("fasta", GffPragmas.fasta); - GFFPRAGMA.put("species-build", GffPragmas.species_build); - GFFPRAGMA.put("source-ontology", GffPragmas.source_ontology); - GFFPRAGMA.put("attribute-ontology", GffPragmas.attribute_ontology); - } + /* + * Jalview: description seqid seqIndex start end type [score] + */ + String desc = st.nextToken(); + String seqId = st.nextToken(); + SequenceI seq = findName(alignment, seqId, relaxedIdmatching, null); + if (!st.hasMoreTokens()) + { + System.err + .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up."); + // in all probability, this isn't a file we understand, so bail + // quietly. + return false; + } - private void processGffPragma(String line, Map gffProps, - AlignmentI align, ArrayList newseqs) - throws IOException - { - // line starts with ## - int spacepos = line.indexOf(' '); - String pragma = spacepos == -1 ? line.substring(2).trim() : line - .substring(2, spacepos); - GffPragmas gffpragma = GFFPRAGMA.get(pragma.toLowerCase()); - if (gffpragma == null) + if (!seqId.equals("ID_NOT_SPECIFIED")) { - return; + seq = findName(alignment, seqId, relaxedIdmatching, null); + st.nextToken(); } - switch (gffpragma) + else { - case gff_version: + seqId = null; + seq = null; try { - gffversion = Integer.parseInt(line.substring(spacepos + 1)); - } finally + int idx = Integer.parseInt(st.nextToken()); + seq = alignment.getSequenceAt(idx); + } catch (NumberFormatException ex) { - + // continue } - break; - case feature_ontology: - // resolve against specific feature ontology - break; - case attribute_ontology: - // resolve against specific attribute ontology - break; - case source_ontology: - // resolve against specific source ontology - break; - case species_build: - // resolve against specific NCBI taxon version - break; - case hash: - // close off any open feature hierarchies - break; - case fasta: - // process the rest of the file as a fasta file and replace any dummy - // sequence IDs - process_as_fasta(align, newseqs); - break; - default: - // we do nothing ? - System.err.println("Ignoring unknown pragma:\n" + line); } - } - private void process_as_fasta(AlignmentI align, List newseqs) - throws IOException - { - try + if (seq == null) { - mark(); - } catch (IOException q) + System.out.println("Sequence not found: " + line); + return false; + } + + int startPos = Integer.parseInt(st.nextToken()); + int endPos = Integer.parseInt(st.nextToken()); + + String ft = st.nextToken(); + + if (!featureColours.containsKey(ft)) { + /* + * Perhaps an old style groups file with no colours - + * synthesize a colour from the feature type + */ + UserColourScheme ucs = new UserColourScheme(ft); + featureColours.put(ft, ucs.findColour('A')); } - FastaFile parser = new FastaFile(this); - List includedseqs = parser.getSeqs(); - SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs); - // iterate over includedseqs, and replacing matching ones with newseqs - // sequences. Generic iterator not used here because we modify includedseqs - // as we go - for (int p = 0, pSize = includedseqs.size(); p < pSize; p++) + SequenceFeature sf = new SequenceFeature(ft, desc, "", + startPos, endPos, featureGroup); + if (st.hasMoreTokens()) { - // search for any dummy seqs that this sequence can be used to update - SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p)); - if (dummyseq != null) + float score = 0f; + try { - // dummyseq was created so it could be annotated and referred to in - // alignments/codon mappings - - SequenceI mseq = includedseqs.get(p); - // mseq is the 'template' imported from the FASTA file which we'll use - // to coomplete dummyseq - if (dummyseq instanceof SequenceDummy) - { - // probably have the pattern wrong - // idea is that a flyweight proxy for a sequence ID can be created for - // 1. stable reference creation - // 2. addition of annotation - // 3. future replacement by a real sequence - // current pattern is to create SequenceDummy objects - a convenience - // constructor for a Sequence. - // problem is that when promoted to a real sequence, all references - // need - // to be updated somehow. - ((SequenceDummy) dummyseq).become(mseq); - includedseqs.set(p, dummyseq); // template is no longer needed - } + score = new Float(st.nextToken()).floatValue(); + // update colourgradient bounds if allowed to + } catch (NumberFormatException ex) + { + // leave as 0 } + sf.setScore(score); } - // finally add sequences to the dataset - for (SequenceI seq : includedseqs) + + parseDescriptionHTML(sf, removeHTML); + + seq.addSequenceFeature(sf); + + while (seqId != null + && (seq = alignment.findName(seq, seqId, false)) != null) { - align.addSequence(seq); + seq.addSequenceFeature(new SequenceFeature(sf)); } + return true; } /** - * take a sequence feature and examine its attributes to decide how it should - * be added to a sequence + * Process a feature type colour specification * - * @param seq - * - the destination sequence constructed or discovered in the - * current context - * @param sf - * - the base feature with ATTRIBUTES property containing any - * additional attributes - * @param gFFFile - * - true if we are processing a GFF annotation file - * @return true if sf was actually added to the sequence, false if it was - * processed in another way + * @param line + * the current input line (for error messages only) + * @param featureType + * the first token on the line + * @param st + * holds remaining tokens on the line + * @param colours + * map to which to add derived colour specification */ - public boolean processOrAddSeqFeature(AlignmentI align, - List newseqs, SequenceI seq, SequenceFeature sf, - boolean gFFFile, boolean relaxedIdMatching) + protected void parseFeatureColour(String line, String featureType, + StringTokenizer st, Map colours) { - String attr = (String) sf.getValue("ATTRIBUTES"); - boolean add = true; - if (gFFFile && attr != null) + Object colour = null; + String colscheme = st.nextToken(); + if (colscheme.indexOf("|") > -1 + || colscheme.trim().equalsIgnoreCase("label")) { - int nattr = 8; - - for (String attset : attr.split("\t")) - { - if (attset == null || attset.trim().length() == 0) - { - continue; - } - nattr++; - Map> set = new HashMap>(); - // normally, only expect one column - 9 - in this field - // the attributes (Gff3) or groups (gff2) field - for (String pair : attset.trim().split(";")) - { - pair = pair.trim(); - if (pair.length() == 0) - { - continue; - } - - // expect either space seperated (gff2) or '=' separated (gff3) - // key/value pairs here - - int eqpos = pair.indexOf('='), sppos = pair.indexOf(' '); - String key = null, value = null; - - if (sppos > -1 && (eqpos == -1 || sppos < eqpos)) - { - key = pair.substring(0, sppos); - value = pair.substring(sppos + 1); - } - else - { - if (eqpos > -1 && (sppos == -1 || eqpos < sppos)) - { - key = pair.substring(0, eqpos); - value = pair.substring(eqpos + 1); - } - else - { - key = pair; - } - } - if (key != null) - { - List vals = set.get(key); - if (vals == null) - { - vals = new ArrayList(); - set.put(key, vals); - } - if (value != null) - { - vals.add(value.trim()); - } - } - } - try - { - add &= processGffKey(set, nattr, seq, sf, align, newseqs, - relaxedIdMatching); // process decides if - // feature is actually - // added - } catch (InvalidGFF3FieldException ivfe) - { - System.err.println(ivfe); - } - } + colour = parseGraduatedColourScheme(line, colscheme); } - if (add) + else { - seq.addSequenceFeature(sf); + UserColourScheme ucs = new UserColourScheme(colscheme); + colour = ucs.findColour('A'); } - return add; - } - - public class InvalidGFF3FieldException extends Exception - { - String field, value; - - public InvalidGFF3FieldException(String field, - Map> set, String message) + if (colour != null) { - super(message + " (Field was " + field + " and value was " - + set.get(field).toString()); - this.field = field; - this.value = set.get(field).toString(); + colours.put(featureType, colour); } - } /** - * take a set of keys for a feature and interpret them + * Parse a Jalview graduated colour descriptor * - * @param set - * @param nattr - * @param seq - * @param sf + * @param line + * @param colourDescriptor * @return */ - public boolean processGffKey(Map> set, int nattr, - SequenceI seq, SequenceFeature sf, AlignmentI align, - List newseqs, boolean relaxedIdMatching) - throws InvalidGFF3FieldException + protected GraduatedColor parseGraduatedColourScheme(String line, + String colourDescriptor) { - String attr; - // decide how to interpret according to type - if (sf.getType().equals("similarity")) - { - int strand = sf.getStrand(); - // exonerate cdna/protein map - // look for fields - List querySeq = findNames(align, newseqs, - relaxedIdMatching, set.get(attr = "Query")); - if (querySeq == null || querySeq.size() != 1) - { - throw new InvalidGFF3FieldException(attr, set, - "Expecting exactly one sequence in Query field (got " - + set.get(attr) + ")"); - } - if (set.containsKey(attr = "Align")) - { - // process the align maps and create cdna/protein maps - // ideally, the query sequences are in the alignment, but maybe not... - - AlignedCodonFrame alco = new AlignedCodonFrame(); - MapList codonmapping = constructCodonMappingFromAlign(set, attr, - strand); - - // add codon mapping, and hope! - alco.addMap(seq, querySeq.get(0), codonmapping); - align.addCodonFrame(alco); - // everything that's needed to be done is done - // no features to create here ! - return false; - } - + // Parse '|' separated graduated colourscheme fields: + // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue] + // can either provide 'label' only, first is optional, next two + // colors are required (but may be + // left blank), next is optional, nxt two min/max are required. + // first is either 'label' + // first/second and third are both hexadecimal or word equivalent + // colour. + // next two are values parsed as floats. + // fifth is either 'above','below', or 'none'. + // sixth is a float value and only required when fifth is either + // 'above' or 'below'. + StringTokenizer gcol = new StringTokenizer(colourDescriptor, "|", true); + // set defaults + float min = Float.MIN_VALUE, max = Float.MAX_VALUE; + boolean labelCol = false; + // Parse spec line + String mincol = gcol.nextToken(); + if (mincol == "|") + { + System.err + .println("Expected either 'label' or a colour specification in the line: " + + line); + return null; } - return true; - } - - private MapList constructCodonMappingFromAlign( - Map> set, String attr, int strand) - throws InvalidGFF3FieldException - { - if (strand == 0) + String maxcol = null; + if (mincol.toLowerCase().indexOf("label") == 0) { - throw new InvalidGFF3FieldException(attr, set, - "Invalid strand for a codon mapping (cannot be 0)"); + labelCol = true; + mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip '|' + mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); } - List fromrange = new ArrayList(), torange = new ArrayList(); - int lastppos = 0, lastpframe = 0; - for (String range : set.get(attr)) + String abso = null, minval, maxval; + if (mincol != null) { - List ints = new ArrayList(); - StringTokenizer st = new StringTokenizer(range, " "); - while (st.hasMoreTokens()) + // at least four more tokens + if (mincol.equals("|")) { - String num = st.nextToken(); - try - { - ints.add(new Integer(num)); - } catch (NumberFormatException nfe) - { - throw new InvalidGFF3FieldException(attr, set, - "Invalid number in field " + num); - } + mincol = ""; } - // Align positionInRef positionInQuery LengthInRef - // contig_1146 exonerate:protein2genome:local similarity 8534 11269 - // 3652 - . alignment_id 0 ; - // Query DDB_G0269124 - // Align 11270 143 120 - // corresponds to : 120 bases align at pos 143 in protein to 11270 on - // dna in strand direction - // Align 11150 187 282 - // corresponds to : 282 bases align at pos 187 in protein to 11150 on - // dna in strand direction - // - // Align 10865 281 888 - // Align 9977 578 1068 - // Align 8909 935 375 - // - if (ints.size() != 3) + else { - throw new InvalidGFF3FieldException(attr, set, - "Invalid number of fields for this attribute (" - + ints.size() + ")"); + gcol.nextToken(); // skip next '|' } - fromrange.add(new Integer(ints.get(0).intValue())); - fromrange.add(new Integer(ints.get(0).intValue() + strand - * ints.get(2).intValue())); - // how are intron/exon boundaries that do not align in codons - // represented - if (ints.get(1).equals(lastppos) && lastpframe > 0) + // continue parsing rest of line + maxcol = gcol.nextToken(); + if (maxcol.equals("|")) { - // extend existing to map - lastppos += ints.get(2) / 3; - lastpframe = ints.get(2) % 3; - torange.set(torange.size() - 1, new Integer(lastppos)); + maxcol = ""; } else { - // new to map range - torange.add(ints.get(1)); - lastppos = ints.get(1) + ints.get(2) / 3; - lastpframe = ints.get(2) % 3; - torange.add(new Integer(lastppos)); + gcol.nextToken(); // skip next '|' + } + abso = gcol.nextToken(); + gcol.nextToken(); // skip next '|' + if (abso.toLowerCase().indexOf("abso") != 0) + { + minval = abso; + abso = null; + } + else + { + minval = gcol.nextToken(); + gcol.nextToken(); // skip next '|' + } + maxval = gcol.nextToken(); + if (gcol.hasMoreTokens()) + { + gcol.nextToken(); // skip next '|' + } + try + { + if (minval.length() > 0) + { + min = Float.valueOf(minval); + } + } catch (Exception e) + { + System.err + .println("Couldn't parse the minimum value for graduated colour for type (" + + colourDescriptor + + ") - did you misspell 'auto' for the optional automatic colour switch ?"); + e.printStackTrace(); + } + try + { + if (maxval.length() > 0) + { + max = Float.valueOf(maxval); + } + } catch (Exception e) + { + System.err + .println("Couldn't parse the maximum value for graduated colour for type (" + + colourDescriptor + ")"); + e.printStackTrace(); } } - // from and to ranges must end up being a series of start/end intervals - if (fromrange.size() % 2 == 1) - { - throw new InvalidGFF3FieldException(attr, set, - "Couldn't parse the DNA alignment range correctly"); - } - if (torange.size() % 2 == 1) + else { - throw new InvalidGFF3FieldException(attr, set, - "Couldn't parse the protein alignment range correctly"); + // add in some dummy min/max colours for the label-only + // colourscheme. + mincol = "FFFFFF"; + maxcol = "000000"; } - // finally, build the map - int[] frommap = new int[fromrange.size()], tomap = new int[torange - .size()]; - int p = 0; - for (Integer ip : fromrange) + + GraduatedColor colour = null; + try { - frommap[p++] = ip.intValue(); - } - p = 0; - for (Integer ip : torange) + colour = new GraduatedColor( + new UserColourScheme(mincol).findColour('A'), + new UserColourScheme(maxcol).findColour('A'), min, max); + } catch (Exception e) { - tomap[p++] = ip.intValue(); + System.err.println("Couldn't parse the graduated colour scheme (" + + colourDescriptor + ")"); + e.printStackTrace(); } - - return new MapList(frommap, tomap, 3, 1); - } - - private List findNames(AlignmentI align, - List newseqs, boolean relaxedIdMatching, - List list) - { - List found = new ArrayList(); - for (String seqId : list) + if (colour != null) { - SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs); - if (seq != null) + colour.setColourByLabel(labelCol); + colour.setAutoScaled(abso == null); + // add in any additional parameters + String ttype = null, tval = null; + if (gcol.hasMoreTokens()) { - found.add(seq); + // threshold type and possibly a threshold value + ttype = gcol.nextToken(); + if (ttype.toLowerCase().startsWith("below")) + { + colour.setThreshType(AnnotationColourGradient.BELOW_THRESHOLD); + } + else if (ttype.toLowerCase().startsWith("above")) + { + colour.setThreshType(AnnotationColourGradient.ABOVE_THRESHOLD); + } + else + { + colour.setThreshType(AnnotationColourGradient.NO_THRESHOLD); + if (!ttype.toLowerCase().startsWith("no")) + { + System.err.println("Ignoring unrecognised threshold type : " + + ttype); + } + } + } + if (colour.getThreshType() != AnnotationColourGradient.NO_THRESHOLD) + { + try + { + gcol.nextToken(); + tval = gcol.nextToken(); + colour.setThresh(new Float(tval).floatValue()); + } catch (Exception e) + { + System.err.println("Couldn't parse threshold value as a float: (" + + tval + ")"); + e.printStackTrace(); + } + } + // parse the thresh-is-min token ? + if (gcol.hasMoreTokens()) + { + System.err + .println("Ignoring additional tokens in parameters in graduated colour specification\n"); + while (gcol.hasMoreTokens()) + { + System.err.println("|" + gcol.nextToken()); + } + System.err.println("\n"); } } - return found; + return colour; } - private AlignmentI lastmatchedAl = null; - - private SequenceIdMatcher matcher = null; - /** * clear any temporary handles used to speed up ID matching */ - private void resetMatcher() + protected void resetMatcher() { lastmatchedAl = null; matcher = null; } - private SequenceI findName(AlignmentI align, String seqId, + /** + * Returns a sequence matching the given id, as follows + *
    + *
  • matching is on exact sequence name, or on a token within the sequence + * name, or a dbxref, if relaxed matching is selected
  • + *
  • first tries to find a match in the alignment sequences
  • + *
  • else tries to find a match in the new sequences already generated + * parsing the features file
  • + *
  • else creates a new placeholder sequence, adds it to the new sequences + * list, and returns it
  • + *
+ * + * @param align + * @param seqId + * @param relaxedIdMatching + * @param newseqs + * @return + */ + protected SequenceI findName(AlignmentI align, String seqId, boolean relaxedIdMatching, List newseqs) { SequenceI match = null; @@ -1114,8 +622,8 @@ public class FeaturesFile extends AlignFile { if (lastmatchedAl != align) { - matcher = new SequenceIdMatcher( - (lastmatchedAl = align).getSequencesArray()); + lastmatchedAl = align; + matcher = new SequenceIdMatcher(align.getSequencesArray()); if (newseqs != null) { matcher.addAll(newseqs); @@ -1157,7 +665,7 @@ public class FeaturesFile extends AlignFile { return; } - jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks( + ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks( sf.getDescription(), removeHTML, newline); sf.description = (removeHTML) ? parsed.getNonHtmlContent() @@ -1172,22 +680,22 @@ public class FeaturesFile extends AlignFile /** * generate a features file for seqs includes non-pos features by default. * - * @param seqs + * @param sequences * source of sequence features * @param visible * hash of feature types and colours * @return features file contents */ - public String printJalviewFormat(SequenceI[] seqs, + public String printJalviewFormat(SequenceI[] sequences, Map visible) { - return printJalviewFormat(seqs, visible, true, true); + return printJalviewFormat(sequences, visible, true, true); } /** * generate a features file for seqs with colours from visible (if any) * - * @param seqs + * @param sequences * source of features * @param visible * hash of Colours for each feature type @@ -1198,11 +706,10 @@ public class FeaturesFile extends AlignFile * of group or type) * @return features file contents */ - public String printJalviewFormat(SequenceI[] seqs, Map visible, - boolean visOnly, boolean nonpos) + public String printJalviewFormat(SequenceI[] sequences, + Map visible, boolean visOnly, boolean nonpos) { - StringBuffer out = new StringBuffer(); - SequenceFeature[] next; + StringBuilder out = new StringBuilder(256); boolean featuresGen = false; if (visOnly && !nonpos && (visible == null || visible.size() < 1)) { @@ -1215,15 +722,15 @@ public class FeaturesFile extends AlignFile // write feature colours only if we're given them and we are generating // viewed features // TODO: decide if feature links should also be written here ? - Iterator en = visible.keySet().iterator(); - String type, color; + Iterator en = visible.keySet().iterator(); + String featureType, color; while (en.hasNext()) { - type = en.next().toString(); + featureType = en.next().toString(); - if (visible.get(type) instanceof GraduatedColor) + if (visible.get(featureType) instanceof GraduatedColor) { - GraduatedColor gc = (GraduatedColor) visible.get(type); + GraduatedColor gc = (GraduatedColor) visible.get(featureType); color = (gc.isColourByLabel() ? "label|" : "") + Format.getHexString(gc.getMinColor()) + "|" + Format.getHexString(gc.getMaxColor()) @@ -1252,46 +759,47 @@ public class FeaturesFile extends AlignFile color += "none"; } } - else if (visible.get(type) instanceof java.awt.Color) + else if (visible.get(featureType) instanceof Color) { - color = Format.getHexString((java.awt.Color) visible.get(type)); + color = Format.getHexString((Color) visible.get(featureType)); } else { // legacy support for integer objects containing colour triplet values - color = Format.getHexString(new java.awt.Color(Integer - .parseInt(visible.get(type).toString()))); + color = Format.getHexString(new Color(Integer.parseInt(visible + .get(featureType).toString()))); } - out.append(type); - out.append("\t"); + out.append(featureType); + out.append(TAB); out.append(color); out.append(newline); } } // Work out which groups are both present and visible - Vector groups = new Vector(); + List groups = new ArrayList(); int groupIndex = 0; boolean isnonpos = false; - for (int i = 0; i < seqs.length; i++) + SequenceFeature[] features; + for (int i = 0; i < sequences.length; i++) { - next = seqs[i].getSequenceFeatures(); - if (next != null) + features = sequences[i].getSequenceFeatures(); + if (features != null) { - for (int j = 0; j < next.length; j++) + for (int j = 0; j < features.length; j++) { - isnonpos = next[j].begin == 0 && next[j].end == 0; + isnonpos = features[j].begin == 0 && features[j].end == 0; if ((!nonpos && isnonpos) || (!isnonpos && visOnly && !visible - .containsKey(next[j].type))) + .containsKey(features[j].type))) { continue; } - if (next[j].featureGroup != null - && !groups.contains(next[j].featureGroup)) + if (features[j].featureGroup != null + && !groups.contains(features[j].featureGroup)) { - groups.addElement(next[j].featureGroup); + groups.add(features[j].featureGroup); } } } @@ -1300,12 +808,11 @@ public class FeaturesFile extends AlignFile String group = null; do { - if (groups.size() > 0 && groupIndex < groups.size()) { - group = groups.elementAt(groupIndex).toString(); + group = groups.get(groupIndex); out.append(newline); - out.append("STARTGROUP\t"); + out.append("STARTGROUP").append(TAB); out.append(group); out.append(newline); } @@ -1314,17 +821,17 @@ public class FeaturesFile extends AlignFile group = null; } - for (int i = 0; i < seqs.length; i++) + for (int i = 0; i < sequences.length; i++) { - next = seqs[i].getSequenceFeatures(); - if (next != null) + features = sequences[i].getSequenceFeatures(); + if (features != null) { - for (int j = 0; j < next.length; j++) + for (int j = 0; j < features.length; j++) { - isnonpos = next[j].begin == 0 && next[j].end == 0; + isnonpos = features[j].begin == 0 && features[j].end == 0; if ((!nonpos && isnonpos) || (!isnonpos && visOnly && !visible - .containsKey(next[j].type))) + .containsKey(features[j].type))) { // skip if feature is nonpos and we ignore them or if we only // output visible and it isn't non-pos and it's not visible @@ -1332,65 +839,65 @@ public class FeaturesFile extends AlignFile } if (group != null - && (next[j].featureGroup == null || !next[j].featureGroup + && (features[j].featureGroup == null || !features[j].featureGroup .equals(group))) { continue; } - if (group == null && next[j].featureGroup != null) + if (group == null && features[j].featureGroup != null) { continue; } // we have features to output featuresGen = true; - if (next[j].description == null - || next[j].description.equals("")) + if (features[j].description == null + || features[j].description.equals("")) { - out.append(next[j].type + "\t"); + out.append(features[j].type).append(TAB); } else { - if (next[j].links != null - && next[j].getDescription().indexOf("") == -1) + if (features[j].links != null + && features[j].getDescription().indexOf("") == -1) { out.append(""); } - out.append(next[j].description + " "); - if (next[j].links != null) + out.append(features[j].description + " "); + if (features[j].links != null) { - for (int l = 0; l < next[j].links.size(); l++) + for (int l = 0; l < features[j].links.size(); l++) { - String label = next[j].links.elementAt(l).toString(); + String label = features[j].links.elementAt(l).toString(); String href = label.substring(label.indexOf("|") + 1); label = label.substring(0, label.indexOf("|")); - if (next[j].description.indexOf(href) == -1) + if (features[j].description.indexOf(href) == -1) { out.append("" + label + ""); } } - if (next[j].getDescription().indexOf("") == -1) + if (features[j].getDescription().indexOf("") == -1) { out.append(""); } } - out.append("\t"); + out.append(TAB); } - out.append(seqs[i].getName()); + out.append(sequences[i].getName()); out.append("\t-1\t"); - out.append(next[j].begin); - out.append("\t"); - out.append(next[j].end); - out.append("\t"); - out.append(next[j].type); - if (!Float.isNaN(next[j].score)) + out.append(features[j].begin); + out.append(TAB); + out.append(features[j].end); + out.append(TAB); + out.append(features[j].type); + if (!Float.isNaN(features[j].score)) { - out.append("\t"); - out.append(next[j].score); + out.append(TAB); + out.append(features[j].score); } out.append(newline); } @@ -1399,7 +906,7 @@ public class FeaturesFile extends AlignFile if (group != null) { - out.append("ENDGROUP\t"); + out.append("ENDGROUP").append(TAB); out.append(group); out.append(newline); groupIndex++; @@ -1420,112 +927,594 @@ public class FeaturesFile extends AlignFile } /** - * generate a gff file for sequence features includes non-pos features by - * default. + * Parse method that is called when a GFF file is dragged to the desktop + */ + @Override + public void parse() + { + AlignViewportI av = getViewport(); + if (av != null) + { + if (av.getAlignment() != null) + { + dataset = av.getAlignment().getDataset(); + } + if (dataset == null) + { + // working in the applet context ? + dataset = av.getAlignment(); + } + } + else + { + dataset = new Alignment(new SequenceI[] {}); + } + + boolean parseResult = parse(dataset, null, false, true); + if (!parseResult) + { + // pass error up somehow + } + if (av != null) + { + // update viewport with the dataset data ? + } + else + { + setSeqs(dataset.getSequencesArray()); + } + } + + /** + * Implementation of unused abstract method + * + * @return error message + */ + @Override + public String print() + { + return "Use printGffFormat() or printJalviewFormat()"; + } + + /** + * Returns features output in GFF2 format, including hidden and non-positional + * features * - * @param seqs + * @param sequences + * the sequences whose features are to be output * @param visible + * a map whose keys are the type names of visible features * @return */ - public String printGFFFormat(SequenceI[] seqs, Map visible) + public String printGffFormat(SequenceI[] sequences, Map visible) { - return printGFFFormat(seqs, visible, true, true); + return printGffFormat(sequences, visible, true, true); } - public String printGFFFormat(SequenceI[] seqs, - Map visible, boolean visOnly, boolean nonpos) + /** + * Returns features output in GFF2 format + * + * @param sequences + * the sequences whose features are to be output + * @param visible + * a map whose keys are the type names of visible features + * @param outputVisibleOnly + * @param includeNonPositionalFeatures + * @return + */ + public String printGffFormat(SequenceI[] sequences, Map visible, boolean outputVisibleOnly, + boolean includeNonPositionalFeatures) { - StringBuffer out = new StringBuffer(); - SequenceFeature[] next; + StringBuilder out = new StringBuilder(256); + out.append(String.format("%s %d\n", GFF_VERSION, gffVersion)); String source; boolean isnonpos; - for (int i = 0; i < seqs.length; i++) + for (SequenceI seq : sequences) { - if (seqs[i].getSequenceFeatures() != null) + SequenceFeature[] features = seq.getSequenceFeatures(); + if (features != null) { - next = seqs[i].getSequenceFeatures(); - for (int j = 0; j < next.length; j++) + for (SequenceFeature sf : features) { - isnonpos = next[j].begin == 0 && next[j].end == 0; - if ((!nonpos && isnonpos) - || (!isnonpos && visOnly && !visible - .containsKey(next[j].type))) + isnonpos = sf.begin == 0 && sf.end == 0; + if (!includeNonPositionalFeatures && isnonpos) { + /* + * ignore non-positional features if not wanted + */ continue; } - - source = next[j].featureGroup; - if (source == null) + // TODO why the test !isnonpos here? + // what about not visible non-positional features? + if (!isnonpos && outputVisibleOnly + && !visible.containsKey(sf.type)) { - source = next[j].getDescription(); + /* + * ignore not visible features if not wanted + */ + continue; } - - out.append(seqs[i].getName()); - out.append("\t"); - out.append(source); - out.append("\t"); - out.append(next[j].type); - out.append("\t"); - out.append(next[j].begin); - out.append("\t"); - out.append(next[j].end); - out.append("\t"); - out.append(next[j].score); - out.append("\t"); - - if (next[j].getValue("STRAND") != null) + + source = sf.featureGroup; + if (source == null) { - out.append(next[j].getValue("STRAND")); - out.append("\t"); + source = sf.getDescription(); } - else + + out.append(seq.getName()); + out.append(TAB); + out.append(source); + out.append(TAB); + out.append(sf.type); + out.append(TAB); + out.append(sf.begin); + out.append(TAB); + out.append(sf.end); + out.append(TAB); + out.append(sf.score); + out.append(TAB); + + out.append(sf.getValue(STRAND, ".")); + out.append(TAB); + + out.append(sf.getValue(FRAME, ".")); + + // miscellaneous key-values (GFF column 9) + String attributes = (String) sf.getValue(ATTRIBUTES); + if (attributes != null) { - out.append(".\t"); + out.append(TAB).append(attributes); } + + out.append(newline); + } + } + } + + return out.toString(); + } - if (next[j].getValue("FRAME") != null) - { - out.append(next[j].getValue("FRAME")); - } - else - { - out.append("."); - } - // TODO: verify/check GFF - should there be a /t here before attribute - // output ? + /** + * Helper method to make a mapping given a set of attributes for a GFF feature + * + * @param set + * @param attr + * @param strand + * either 1 (forward) or -1 (reverse) + * @return + * @throws InvalidGFF3FieldException + */ + protected MapList constructCodonMappingFromAlign( + Map> set, String attr, + int strand) throws InvalidGFF3FieldException + { + if (strand == 0) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid strand for a codon mapping (cannot be 0)"); + } + List fromrange = new ArrayList(); + List torange = new ArrayList(); + int lastppos = 0, lastpframe = 0; + for (String range : set.get(attr)) + { + List ints = new ArrayList(); + StringTokenizer st = new StringTokenizer(range, " "); + while (st.hasMoreTokens()) + { + String num = st.nextToken(); + try + { + ints.add(new Integer(num)); + } catch (NumberFormatException nfe) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid number in field " + num); + } + } + /* + * Align positionInRef positionInQuery LengthInRef + * contig_1146 exonerate:p2g:local similarity 8534 11269 3652 - . + * alignment_id 0 ; Query DDB_G0269124 Align 11270 143 120 + * means: + * 120 bases align at pos 143 in protein to 11270 on dna (-ve strand) + * and so on for additional ' ; Align x y z' groups + */ + if (ints.size() != 3) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid number of fields for this attribute (" + + ints.size() + ")"); + } + fromrange.add(ints.get(0)); + fromrange.add(ints.get(0) + strand * ints.get(2)); + // how are intron/exon boundaries that do not align in codons + // represented + if (ints.get(1).intValue() == lastppos && lastpframe > 0) + { + // extend existing to map + lastppos += ints.get(2) / 3; + lastpframe = ints.get(2) % 3; + torange.set(torange.size() - 1, new Integer(lastppos)); + } + else + { + // new to map range + torange.add(ints.get(1)); + lastppos = ints.get(1) + ints.get(2) / 3; + lastpframe = ints.get(2) % 3; + torange.add(new Integer(lastppos)); + } + } + // from and to ranges must end up being a series of start/end intervals + if (fromrange.size() % 2 == 1) + { + throw new InvalidGFF3FieldException(attr, set, + "Couldn't parse the DNA alignment range correctly"); + } + if (torange.size() % 2 == 1) + { + throw new InvalidGFF3FieldException(attr, set, + "Couldn't parse the protein alignment range correctly"); + } + // finally, build the map + int[] frommap = new int[fromrange.size()], tomap = new int[torange + .size()]; + int p = 0; + for (Integer ip : fromrange) + { + frommap[p++] = ip.intValue(); + } + p = 0; + for (Integer ip : torange) + { + tomap[p++] = ip.intValue(); + } + + return new MapList(frommap, tomap, 3, 1); + } - if (next[j].getValue("ATTRIBUTES") != null) - { - out.append(next[j].getValue("ATTRIBUTES")); - } + private List findNames(AlignmentI align, List newseqs, boolean relaxedIdMatching, + List list) + { + List found = new ArrayList(); + for (String seqId : list) + { + SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs); + if (seq != null) + { + found.add(seq); + } + } + return found; + } - out.append(newline); + /** + * Parse a GFF format feature. This may include creating a 'dummy' sequence + * for the feature or its mapped sequence + * + * @param st + * @param alignment + * @param relaxedIdmatching + * @param newseqs + * @return + */ + protected SequenceI parseGffFeature(StringTokenizer st, AlignmentI alignment, boolean relaxedIdmatching, + List newseqs) + { + SequenceI seq; + /* + * GFF: seqid source type start end score strand phase [attributes] + */ + String seqId = st.nextToken(); + + /* + * locate referenced sequence in alignment _or_ + * as a forward reference (SequenceDummy) + */ + seq = findName(alignment, seqId, relaxedIdmatching, newseqs); + + String desc = st.nextToken(); + String group = null; + if (desc.indexOf(' ') == -1) + { + // could also be a source term rather than description line + group = desc; + } + String ft = st.nextToken(); + int startPos = StringUtils.parseInt(st.nextToken()); + int endPos = StringUtils.parseInt(st.nextToken()); + // TODO: decide if non positional feature assertion for input data + // where end==0 is generally valid + if (endPos == 0) + { + // treat as non-positional feature, regardless. + startPos = 0; + } + float score = 0f; + try + { + score = new Float(st.nextToken()).floatValue(); + } catch (NumberFormatException ex) + { + // leave at 0 + } + + SequenceFeature sf = new SequenceFeature(ft, desc, startPos, + endPos, score, group); + if (st.hasMoreTokens()) + { + sf.setValue(STRAND, st.nextToken()); + } + if (st.hasMoreTokens()) + { + sf.setValue(FRAME, st.nextToken()); + } + + if (st.hasMoreTokens()) + { + String attributes = st.nextToken(); + sf.setValue(ATTRIBUTES, attributes); + + /* + * parse semi-structured attributes in column 9 and add them to the + * sequence feature's 'otherData' table; use Note as a best proxy for + * description + */ + Map> nameValues = StringUtils.parseNameValuePairs(attributes, ";", + new char[] { ' ', '=' }); + for (Entry> attr : nameValues.entrySet()) + { + String values = StringUtils.listToDelimitedString(attr.getValue(), + "; "); + sf.setValue(attr.getKey(), values); + if ("Note".equals(attr.getKey())) + { + sf.setDescription(values); + } + } + } + + if (processOrAddSeqFeature(alignment, newseqs, seq, sf, + relaxedIdmatching)) + { + // check whether we should add the sequence feature to any other + // sequences in the alignment with the same or similar + while ((seq = alignment.findName(seq, seqId, true)) != null) + { + seq.addSequenceFeature(new SequenceFeature(sf)); + } + } + return seq; + } + /** + * After encountering ##fasta in a GFF3 file, process the remainder of the + * file as FAST sequence data. Any placeholder sequences created during + * feature parsing are updated with the actual sequences. + * + * @param align + * @param newseqs + * @throws IOException + */ + protected void processAsFasta(AlignmentI align, List newseqs) + throws IOException + { + try + { + mark(); + } catch (IOException q) + { + } + FastaFile parser = new FastaFile(this); + List includedseqs = parser.getSeqs(); + SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs); + // iterate over includedseqs, and replacing matching ones with newseqs + // sequences. Generic iterator not used here because we modify includedseqs + // as we go + for (int p = 0, pSize = includedseqs.size(); p < pSize; p++) + { + // search for any dummy seqs that this sequence can be used to update + SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p)); + if (dummyseq != null) + { + // dummyseq was created so it could be annotated and referred to in + // alignments/codon mappings + + SequenceI mseq = includedseqs.get(p); + // mseq is the 'template' imported from the FASTA file which we'll use + // to coomplete dummyseq + if (dummyseq instanceof SequenceDummy) + { + // probably have the pattern wrong + // idea is that a flyweight proxy for a sequence ID can be created for + // 1. stable reference creation + // 2. addition of annotation + // 3. future replacement by a real sequence + // current pattern is to create SequenceDummy objects - a convenience + // constructor for a Sequence. + // problem is that when promoted to a real sequence, all references + // need + // to be updated somehow. + ((SequenceDummy) dummyseq).become(mseq); + includedseqs.set(p, dummyseq); // template is no longer needed } } } + // finally add sequences to the dataset + for (SequenceI seq : includedseqs) + { + align.addSequence(seq); + } + } - return out.toString(); + /** + * Process a ## directive + * + * @param line + * @param gffProps + * @param align + * @param newseqs + * @throws IOException + */ + protected void processGffPragma(String line, Map gffProps, AlignmentI align, + List newseqs) throws IOException + { + line = line.trim(); + if ("###".equals(line)) + { + // close off any open 'forward references' + return; + } + + String[] tokens = line.substring(2).split(" "); + String pragma = tokens[0]; + String value = tokens.length == 1 ? null : tokens[1]; + + if ("gff-version".equalsIgnoreCase(pragma)) + { + if (value != null) + { + try + { + // value may be e.g. "3.1.2" + gffVersion = Integer.parseInt(value.split("\\.")[0]); + } catch (NumberFormatException e) + { + // ignore + } + } + } + else if ("feature-ontology".equalsIgnoreCase(pragma)) + { + // should resolve against the specified feature ontology URI + } + else if ("attribute-ontology".equalsIgnoreCase(pragma)) + { + // URI of attribute ontology - not currently used in GFF3 + } + else if ("source-ontology".equalsIgnoreCase(pragma)) + { + // URI of source ontology - not currently used in GFF3 + } + else if ("species-build".equalsIgnoreCase(pragma)) + { + // save URI of specific NCBI taxon version of annotations + gffProps.put("species-build", value); + } + else if ("fasta".equalsIgnoreCase(pragma)) + { + // process the rest of the file as a fasta file and replace any dummy + // sequence IDs + processAsFasta(align, newseqs); + } + else + { + System.err.println("Ignoring unknown pragma: " + line); + } } /** - * this is only for the benefit of object polymorphism - method does nothing. + * Processes the 'Query' and 'Align' properties associated with a GFF + * similarity feature; these properties define the mapping of the annotated + * feature to another from which it has transferred annotation + * + * @param set + * @param seq + * @param sf + * @return */ - @Override - public void parse() + public void processGffSimilarity(Map> set, SequenceI seq, + SequenceFeature sf, AlignmentI align, List newseqs, boolean relaxedIdMatching) + throws InvalidGFF3FieldException { - // IGNORED + int strand = sf.getStrand(); + // exonerate cdna/protein map + // look for fields + List querySeq = findNames(align, newseqs, relaxedIdMatching, + set.get("Query")); + if (querySeq == null || querySeq.size() != 1) + { + throw new InvalidGFF3FieldException("Query", set, + "Expecting exactly one sequence in Query field (got " + + set.get("Query") + ")"); + } + if (set.containsKey("Align")) + { + // process the align maps and create cdna/protein maps + // ideally, the query sequences are in the alignment, but maybe not... + + AlignedCodonFrame alco = new AlignedCodonFrame(); + MapList codonmapping = constructCodonMappingFromAlign(set, "Align", + strand); + + // add codon mapping, and hope! + alco.addMap(seq, querySeq.get(0), codonmapping); + align.addCodonFrame(alco); + } + } /** - * this is only for the benefit of object polymorphism - method does nothing. + * take a sequence feature and examine its attributes to decide how it should + * be added to a sequence * - * @return error message + * @param seq + * - the destination sequence constructed or discovered in the + * current context + * @param sf + * - the base feature with ATTRIBUTES property containing any + * additional attributes + * @param gFFFile + * - true if we are processing a GFF annotation file + * @return true if sf was actually added to the sequence, false if it was + * processed in another way */ - @Override - public String print() + public boolean processOrAddSeqFeature(AlignmentI align, List newseqs, + SequenceI seq, SequenceFeature sf, boolean relaxedIdMatching) { - return "USE printGFFFormat() or printJalviewFormat()"; + String attr = (String) sf.getValue(ATTRIBUTES); + boolean addFeature = true; + if (attr != null) + { + for (String attset : attr.split(TAB)) + { + Map> set = StringUtils.parseNameValuePairs( + attset, ";", new char[] { ' ', '-' }); + + if ("similarity".equals(sf.getType())) + { + try + { + processGffSimilarity(set, seq, sf, align, newseqs, + relaxedIdMatching); + addFeature = false; + } catch (InvalidGFF3FieldException ivfe) + { + System.err.println(ivfe); + } + } + } + } + if (addFeature) + { + seq.addSequenceFeature(sf); + } + return addFeature; } } + +class InvalidGFF3FieldException extends Exception +{ + String field, value; + + public InvalidGFF3FieldException(String field, + Map> set, String message) + { + super(message + " (Field was " + field + " and value was " + + set.get(field).toString()); + this.field = field; + this.value = set.get(field).toString(); + } +} diff --git a/src/jalview/io/FileLoader.java b/src/jalview/io/FileLoader.java index 11c40c3..eae7a6b 100755 --- a/src/jalview/io/FileLoader.java +++ b/src/jalview/io/FileLoader.java @@ -250,14 +250,14 @@ public class FileLoader implements Runnable // just in case the caller didn't identify the file for us if (source != null) { - format = new IdentifyFile().Identify(source, false); // identify + format = new IdentifyFile().identify(source, false); // identify // stream and // rewind rather // than close } else { - format = new IdentifyFile().Identify(file, protocol); + format = new IdentifyFile().identify(file, protocol); } } diff --git a/src/jalview/io/FileParse.java b/src/jalview/io/FileParse.java index 405363d..8711354 100755 --- a/src/jalview/io/FileParse.java +++ b/src/jalview/io/FileParse.java @@ -457,11 +457,19 @@ public class FileParse } /** - * rewinds the datasource the beginning. + * Rewinds the datasource to the marked point if possible + * + * @param bytesRead * */ - public void reset() throws IOException + public void reset(int bytesRead) throws IOException { + if (bytesRead >= READAHEAD_LIMIT) + { + System.err.println(String.format( + "File reset error: read %d bytes but reset limit is %d", + bytesRead, READAHEAD_LIMIT)); + } if (dataIn != null && !error) { dataIn.reset(); diff --git a/src/jalview/io/Gff3File.java b/src/jalview/io/Gff3File.java deleted file mode 100644 index 248fa09..0000000 --- a/src/jalview/io/Gff3File.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.io; - -import jalview.api.AlignViewportI; -import jalview.datamodel.AlignedCodonFrame; -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.SequenceI; - -import java.io.IOException; -import java.util.List; - -/** - * A GFF3 File parsing wrapper for the tangled mess that is FeaturesFile. - * - * This class implements the methods relied on by FileLoader/FormatAdapter in - * order to allow them to load alignments directly from GFF2 and GFF3 files that - * contain sequence data and alignment information. - * - * Major issues: - * - * 1. GFF3 files commonly include mappings between DNA, RNA and Protein - so - * this class needs a dataset AlignmentI context to create alignment codon - * mappings. - * - * 2. A single GFF3 file can generate many distinct alignments. Support will be - * needed to allow several AlignmentI instances to be generated from a single - * file. - * - * - * @author jprocter - * - */ -public class Gff3File extends FeaturesFile -{ - - /** - * - */ - public Gff3File() - { - super(); - } - - /** - * @param source - * @throws IOException - */ - public Gff3File(FileParse source) throws IOException - { - super(source); - } - - /** - * @param inFile - * @param type - * @throws IOException - */ - public Gff3File(String inFile, String type) throws IOException - { - super(inFile, type); - } - - /** - * @param parseImmediately - * @param source - * @throws IOException - */ - public Gff3File(boolean parseImmediately, FileParse source) - throws IOException - { - super(parseImmediately, source); - } - - /** - * @param parseImmediately - * @param inFile - * @param type - * @throws IOException - */ - public Gff3File(boolean parseImmediately, String inFile, String type) - throws IOException - { - super(parseImmediately, inFile, type); - } - - /* - * (non-Javadoc) - * - * @see jalview.io.FeaturesFile#print() - */ - @Override - public String print() - { - // TODO GFF3 writer with sensible defaults for writing alignment data - - // return super.printGFFFormat(seqs, visible); - return ("Not yet implemented."); - } - - AlignmentI dataset; - - List alignments; - - @Override - public void parse() - { - AlignViewportI av = getViewport(); - if (av != null) - { - if (av.getAlignment() != null) - { - dataset = av.getAlignment().getDataset(); - } - if (dataset == null) - { - // working in the applet context ? - dataset = av.getAlignment(); - } - } - else - { - dataset = new Alignment(new SequenceI[] {}); - } - - boolean parseResult = parse(dataset, null, null, false, true); - if (!parseResult) - { - // pass error up somehow - } - if (av != null) - { - // update viewport with the dataset data ? - } - else - { - setSeqs(dataset.getSequencesArray()); - } - - } - - @Override - public void addProperties(AlignmentI al) - { - super.addProperties(al); - if (dataset.getCodonFrames() != null) - { - AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset(); - for (AlignedCodonFrame codons : dataset.getCodonFrames()) - { - ds.addCodonFrame(codons); - } - } - } -} diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index aec0540..40e9390 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -30,7 +30,7 @@ import java.io.IOException; */ public class IdentifyFile { - public static final String GFF3File = "GFF v2 or v3"; + public static final String FeaturesFile = "GFF or Jalview features"; /** * Identify a datasource's file content. @@ -44,7 +44,7 @@ public class IdentifyFile * DOCUMENT ME! * @return ID String */ - public String Identify(String file, String protocol) + public String identify(String file, String protocol) { String emessage = "UNIDENTIFIED FILE PARSING ERROR"; FileParse parser = null; @@ -53,7 +53,7 @@ public class IdentifyFile parser = new FileParse(file, protocol); if (parser.isValid()) { - return Identify(parser); + return identify(parser); } } catch (Exception e) { @@ -68,9 +68,9 @@ public class IdentifyFile return emessage; } - public String Identify(FileParse source) + public String identify(FileParse source) { - return Identify(source, true); // preserves original behaviour prior to + return identify(source, true); // preserves original behaviour prior to // version 2.3 } @@ -82,11 +82,12 @@ public class IdentifyFile * @param closeSource * @return filetype string */ - public String Identify(FileParse source, boolean closeSource) + public String identify(FileParse source, boolean closeSource) { String reply = "PFAM"; String data; - int length = 0; + int bytesRead = 0; + int trimmedLength = 0; boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered @@ -98,7 +99,8 @@ public class IdentifyFile } while ((data = source.nextLine()) != null) { - length += data.trim().length(); + bytesRead += data.length(); + trimmedLength += data.trim().length(); if (!lineswereskipped) { for (int i = 0; !isBinary && i < data.length(); i++) @@ -134,7 +136,13 @@ public class IdentifyFile if (data.startsWith("##GFF-VERSION")) { - reply = GFF3File; + // GFF - possibly embedded in a Jalview features file! + reply = FeaturesFile; + break; + } + if (looksLikeFeatureData(data)) + { + reply = FeaturesFile; break; } if (data.indexOf("# STOCKHOLM") > -1) @@ -238,6 +246,7 @@ public class IdentifyFile if ((data.indexOf("<") > -1)) // possible Markup Language data i.e HTML, // RNAML, XML { + // FIXME this is nuts - it consumes the rest of the file if no match boolean identified = false; do { @@ -309,23 +318,13 @@ public class IdentifyFile reply = PhylipFile.FILE_DESC; break; } - - /* - * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else - * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = - * "SimpleBLAST"; break; - * - * } // end comments for Jalview 2.4.1 - */ - else if (!lineswereskipped && data.charAt(0) != '*' - && data.charAt(0) != ' ' - && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(",") -1 + && commaPos > -1 && colonPos < commaPos; + // && data.indexOf(",") getFeatureColours() { return featureColours; } @@ -64,7 +66,7 @@ public class JalviewDataset * @param featureColours * the featureColours to set */ - public void setFeatureColours(Hashtable featureColours) + public void setFeatureColours(Map featureColours) { this.featureColours = featureColours; } @@ -185,7 +187,7 @@ public class JalviewDataset /** * current set of feature colours */ - Hashtable featureColours; + Map featureColours; /** * original identity of each sequence in results @@ -199,7 +201,7 @@ public class JalviewDataset seqDetails = new Hashtable(); al = new ArrayList(); parentDataset = null; - featureColours = new Hashtable(); + featureColours = new HashMap(); } /** @@ -207,9 +209,10 @@ public class JalviewDataset * * @param parentAlignment */ - public JalviewDataset(AlignmentI aldataset, Hashtable fc, + public JalviewDataset(AlignmentI aldataset, Map fc, Hashtable seqDets) { + // TODO not used - remove? this(aldataset, fc, seqDets, null); } @@ -228,7 +231,7 @@ public class JalviewDataset * (may be null) alignment to associate new annotation and trees * with. */ - public JalviewDataset(AlignmentI aldataset, Hashtable fc, + public JalviewDataset(AlignmentI aldataset, Map fc, Hashtable seqDets, AlignmentI parentAlignment) { this(); diff --git a/src/jalview/io/packed/ParsePackedSet.java b/src/jalview/io/packed/ParsePackedSet.java index a4ef77e..01369b9 100644 --- a/src/jalview/io/packed/ParsePackedSet.java +++ b/src/jalview/io/packed/ParsePackedSet.java @@ -30,7 +30,7 @@ import jalview.io.packed.DataProvider.JvDataType; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; -import java.util.Hashtable; +import java.util.HashMap; import java.util.List; public class ParsePackedSet @@ -66,7 +66,7 @@ public class ParsePackedSet String fmt = null; try { - fmt = new IdentifyFile().Identify(src, false); + fmt = new IdentifyFile().identify(src, false); } catch (Exception ex) { exerror = ex; @@ -157,7 +157,7 @@ public class ParsePackedSet // if not, create one. if (context.featureColours == null) { - context.featureColours = new Hashtable(); + context.featureColours = new HashMap(); } try { diff --git a/src/jalview/io/vamsas/Sequencefeature.java b/src/jalview/io/vamsas/Sequencefeature.java index 6e93f45..61491b2 100644 --- a/src/jalview/io/vamsas/Sequencefeature.java +++ b/src/jalview/io/vamsas/Sequencefeature.java @@ -27,6 +27,7 @@ import jalview.io.VamsasAppDatastore; import jalview.util.UrlLink; import java.util.Enumeration; +import java.util.Iterator; import java.util.Vector; import uk.ac.vamsas.objects.core.DataSetAnnotations; @@ -71,6 +72,7 @@ public class Sequencefeature extends Rangetype doJvUpdate(); } + @Override public void addToDocument() { DataSetAnnotations dsa = (DataSetAnnotations) vobj; @@ -89,6 +91,7 @@ public class Sequencefeature extends Rangetype dataset.addDataSetAnnotations(dsa); } + @Override public void addFromDocument() { DataSetAnnotations dsa = (DataSetAnnotations) vobj; @@ -106,6 +109,7 @@ public class Sequencefeature extends Rangetype bindjvvobj(sf, dsa); } + @Override public void conflict() { log.warn("Untested sequencefeature conflict code"); @@ -118,6 +122,7 @@ public class Sequencefeature extends Rangetype addToDocument(); // and create a new feature in the document } + @Override public void updateToDoc() { DataSetAnnotations dsa = (DataSetAnnotations) vobj; @@ -144,6 +149,7 @@ public class Sequencefeature extends Rangetype } + @Override public void updateFromDoc() { DataSetAnnotations dsa = (DataSetAnnotations) vobj; @@ -229,11 +235,11 @@ public class Sequencefeature extends Rangetype } if (feature.otherDetails != null) { - Enumeration iter = feature.otherDetails.keys(); + Iterator iter = feature.otherDetails.keySet().iterator(); Vector props = dsa.getPropertyAsReference(); - while (iter.hasMoreElements()) + while (iter.hasNext()) { - String key = (String) iter.nextElement(); + String key = iter.next(); if (!key.equalsIgnoreCase("score") && !key.equalsIgnoreCase("status")) { diff --git a/src/jalview/schemes/UserColourScheme.java b/src/jalview/schemes/UserColourScheme.java index 7aff05a..b1e4d58 100755 --- a/src/jalview/schemes/UserColourScheme.java +++ b/src/jalview/schemes/UserColourScheme.java @@ -67,7 +67,7 @@ public class UserColourScheme extends ResidueColourScheme if (col == null) { - System.out.println("Unknown colour!! " + colour); + System.out.println("Making colour from name: " + colour); col = createColourFromName(colour); } diff --git a/src/jalview/util/StringUtils.java b/src/jalview/util/StringUtils.java index 533e98b..ad1c0f7 100644 --- a/src/jalview/util/StringUtils.java +++ b/src/jalview/util/StringUtils.java @@ -21,7 +21,9 @@ package jalview.util; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.regex.Pattern; public class StringUtils @@ -248,4 +250,121 @@ public class StringUtils } return "" + separator; } + + /** + * Parses the input line to a map of name / value(s) pairs. For example the + * line
+ * Notes=Fe-S;Method=manual curation; source = Pfam; Notes = Metal
+ * if parsed with delimiter=";" and separators {' ', '='}
+ * would return a map with { Notes={Fe=S, Metal}, Method={manual curation}, + * source={Pfam}}
+ * Note the name/value strings are trimmed of leading / trailing spaces; the + * first separator encountered is used + * + * @param line + * @param delimiter + * the major delimiter between name-value pairs + * @param separators + * one or more separators used between name and value + * @return the name-values map (which may be empty but never null) + */ + public static Map> parseNameValuePairs(String line, + String delimiter, char[] separators) + { + Map> map = new HashMap>(); + if (line == null || line.trim().length() == 0) + { + return map; + } + + for (String pair : line.trim().split(delimiter)) + { + pair = pair.trim(); + if (pair.length() == 0) + { + continue; + } + + int sepPos = -1; + for (char sep : separators) + { + int pos = pair.indexOf(sep); + if (pos > -1 && (sepPos == -1 || pos < sepPos)) + { + sepPos = pos; + } + } + + if (sepPos == -1) + { + // no name=value detected + continue; + } + + String key = pair.substring(0, sepPos).trim(); + String value = pair.substring(sepPos + 1).trim(); + if (value.length() > 0) + { + List vals = map.get(key); + if (vals == null) + { + vals = new ArrayList(); + map.put(key, vals); + } + vals.add(value); + } + } + return map; + } + + /** + * Converts a list to a string with a delimiter before each term except the + * first. Returns an empty string given a null or zero-length argument. This + * can be replaced with StringJoiner in Java 8. + * + * @param terms + * @param delim + * @return + */ + public static String listToDelimitedString(List terms, + String delim) + { + StringBuilder sb = new StringBuilder(32); + if (terms != null && !terms.isEmpty()) + { + boolean appended = false; + for (String term : terms) + { + if (appended) + { + sb.append(delim); + } + appended = true; + sb.append(term); + } + } + return sb.toString(); + } + + /** + * Convenience method to parse a string to an integer, returning 0 if the + * input is null or not a valid integer + * + * @param s + * @return + */ + public static int parseInt(String s) + { + int result = 0; + if (s != null && s.length() > 0) + { + try + { + result = Integer.parseInt(s); + } catch (NumberFormatException ex) + { + } + } + return result; + } } diff --git a/src/jalview/ws/jws1/JPredThread.java b/src/jalview/ws/jws1/JPredThread.java index 8c7768d..8299e3c 100644 --- a/src/jalview/ws/jws1/JPredThread.java +++ b/src/jalview/ws/jws1/JPredThread.java @@ -123,7 +123,7 @@ class JPredThread extends JWS1Thread implements WSClientI jalview.bin.Cache.log.debug("Getting associated alignment."); // we ignore the returned alignment if we only predicted on a single // sequence - String format = new jalview.io.IdentifyFile().Identify( + String format = new jalview.io.IdentifyFile().identify( result.getAligfile(), "Paste"); if (jalview.io.FormatAdapter.isValidFormat(format)) diff --git a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java index 0785dfa..85a729d 100644 --- a/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java +++ b/src/jalview/ws/seqfetcher/DbSourceProxyImpl.java @@ -121,7 +121,7 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy protected AlignmentI parseResult(String result) throws Exception { AlignmentI sequences = null; - String format = new IdentifyFile().Identify(result, "Paste"); + String format = new IdentifyFile().identify(result, "Paste"); if (FormatAdapter.isValidFormat(format)) { sequences = new FormatAdapter().readFile(result.toString(), "Paste", diff --git a/test/jalview/datamodel/SequenceFeatureTest.java b/test/jalview/datamodel/SequenceFeatureTest.java new file mode 100644 index 0000000..7debb0b --- /dev/null +++ b/test/jalview/datamodel/SequenceFeatureTest.java @@ -0,0 +1,48 @@ +package jalview.datamodel; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertSame; + +import org.testng.annotations.Test; + +public class SequenceFeatureTest +{ + @Test(groups = { "Functional" }) + public void testCopyConstructor() + { + SequenceFeature sf1 = new SequenceFeature("type", "desc", 22, 33, + 12.5f, "group"); + sf1.setValue("STRAND", "+"); + sf1.setValue("Note", "Testing"); + Integer count = new Integer(7); + sf1.setValue("Count", count); + + SequenceFeature sf2 = new SequenceFeature(sf1); + assertEquals("type", sf2.getType()); + assertEquals("desc", sf2.getDescription()); + assertEquals(22, sf2.getBegin()); + assertEquals(33, sf2.getEnd()); + assertEquals("+", sf2.getValue("STRAND")); + assertEquals("Testing", sf2.getValue("Note")); + // shallow clone of otherDetails map - contains the same object values! + assertSame(count, sf2.getValue("Count")); + } + + /** + * Tests for retrieving a 'miscellaneous details' property value, with or + * without a supplied default + */ + @Test(groups = { "Functional" }) + public void testGetValue() + { + SequenceFeature sf1 = new SequenceFeature("type", "desc", 22, 33, + 12.5f, "group"); + sf1.setValue("STRAND", "+"); + assertEquals("+", sf1.getValue("STRAND")); + assertNull(sf1.getValue("strand")); // case-sensitive + assertEquals(".", sf1.getValue("unknown", ".")); + Integer i = new Integer(27); + assertSame(i, sf1.getValue("Unknown", i)); + } +} diff --git a/test/jalview/io/AnnotationFileIOTest.java b/test/jalview/io/AnnotationFileIOTest.java index d757a6a..625244d 100644 --- a/test/jalview/io/AnnotationFileIOTest.java +++ b/test/jalview/io/AnnotationFileIOTest.java @@ -70,7 +70,7 @@ public class AnnotationFileIOTest FormatAdapter rf = new FormatAdapter(); AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE, - new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE)); + new IdentifyFile().identify(ff, AppletFormatAdapter.FILE)); // make sure dataset is initialised ? not sure about this for (int i = 0; i < al.getSequencesArray().length; ++i) diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java index 520d1bb..1592392 100644 --- a/test/jalview/io/FeaturesFileTest.java +++ b/test/jalview/io/FeaturesFileTest.java @@ -21,12 +21,15 @@ package jalview.io; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; import jalview.gui.AlignFrame; +import jalview.schemes.AnnotationColourGradient; +import jalview.schemes.GraduatedColor; import java.awt.Color; import java.io.File; @@ -38,41 +41,9 @@ import org.testng.annotations.Test; public class FeaturesFileTest { - static String TestFiles[][] = { { "Test example features import/export", - "examples/uniref50.fa", "examples/exampleFeatures.txt" } }; - @Test(groups = { "Functional" }) public void testParse() throws Exception { - testFeaturesFileIO("Features file test"); - } - - public static AlignmentI readAlignmentFile(File f) throws IOException - { - System.out.println("Reading file: " + f); - String ff = f.getPath(); - FormatAdapter rf = new FormatAdapter(); - - AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE, - new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE)); - - al.setDataset(null); // creates dataset sequences - assertNotNull("Couldn't read supplied alignment data.", al); - return al; - } - - /** - * Helper method for testing - * - * @param testname - * @param f - * alignment file - * @param featFile - * features file to load on to the alignment - * @throws IOException - */ - public static void testFeaturesFileIO(String testname) throws IOException - { File f = new File("examples/uniref50.fa"); AlignmentI al = readAlignmentFile(f); AlignFrame af = new AlignFrame(al, 500, 500); @@ -80,7 +51,8 @@ public class FeaturesFileTest .getFeatureColours(); FeaturesFile featuresFile = new FeaturesFile( "examples/exampleFeatures.txt", FormatAdapter.FILE); - assertTrue("Test " + testname + "\nFailed to parse features file.", + assertTrue("Test " + "Features file test" + + "\nFailed to parse features file.", featuresFile.parse(al.getDataset(), colours, true)); /* @@ -149,4 +121,202 @@ public class FeaturesFileTest assertEquals("netphos", sf.featureGroup); assertEquals("PHOSPHORYLATION (T)", sf.type); } + + /** + * Test parsing a features file with a mix of Jalview and GFF formatted + * content + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParse_mixedJalviewGff() throws Exception + { + File f = new File("examples/uniref50.fa"); + AlignmentI al = readAlignmentFile(f); + AlignFrame af = new AlignFrame(al, 500, 500); + Map colours = af.getFeatureRenderer() + .getFeatureColours(); + String gffData = "METAL\tcc9900\n" + "GFF\n" + + "FER_CAPAA\tuniprot\tMETAL\t44\t45\t4.0\t.\t.\n" + + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t2.0\t.\t."; + FeaturesFile featuresFile = new FeaturesFile(gffData, + FormatAdapter.PASTE); + assertTrue("Failed to parse features file", + featuresFile.parse(al.getDataset(), colours, true)); + + // verify colours read or synthesized + colours = af.getFeatureRenderer().getFeatureColours(); + assertEquals("1 feature group colours not found", 1, colours.size()); + assertEquals(colours.get("METAL"), new Color(0xcc9900)); + + // verify feature on FER_CAPAA + SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence() + .getSequenceFeatures(); + assertEquals(1, sfs.length); + SequenceFeature sf = sfs[0]; + assertEquals("uniprot", sf.description); + assertEquals(44, sf.begin); + assertEquals(45, sf.end); + assertEquals("uniprot", sf.featureGroup); + assertEquals("METAL", sf.type); + assertEquals(4f, sf.getScore(), 0.001f); + + // verify feature on FER1_SOLLC + sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures(); + assertEquals(1, sfs.length); + sf = sfs[0]; + assertEquals("uniprot", sf.description); + assertEquals(55, sf.begin); + assertEquals(130, sf.end); + assertEquals("uniprot", sf.featureGroup); + assertEquals("Pfam", sf.type); + assertEquals(2f, sf.getScore(), 0.001f); + } + + public static AlignmentI readAlignmentFile(File f) throws IOException + { + System.out.println("Reading file: " + f); + String ff = f.getPath(); + FormatAdapter rf = new FormatAdapter(); + + AlignmentI al = rf.readFile(ff, FormatAdapter.FILE, + new IdentifyFile().identify(ff, FormatAdapter.FILE)); + + al.setDataset(null); // creates dataset sequences + assertNotNull("Couldn't read supplied alignment data.", al); + return al; + } + + /** + * Test various ways of describing a feature colour scheme + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParseGraduatedColourScheme() throws Exception + { + FeaturesFile ff = new FeaturesFile(); + + // colour by label: + GraduatedColor gc = ff.parseGraduatedColourScheme( + "BETA-TURN-IR\t9a6a94", "label"); + assertTrue(gc.isColourByLabel()); + assertEquals(Color.white, gc.getMinColor()); + assertEquals(Color.black, gc.getMaxColor()); + assertTrue(gc.isAutoScale()); + + // using colour name, rgb, etc: + String spec = "blue|255,0,255|absolute|20.0|95.0|below|66.0"; + gc = ff.parseGraduatedColourScheme("BETA-TURN-IR\t" + spec, spec); + assertFalse(gc.isColourByLabel()); + assertEquals(Color.blue, gc.getMinColor()); + assertEquals(new Color(255, 0, 255), gc.getMaxColor()); + assertFalse(gc.isAutoScale()); + assertFalse(gc.getTolow()); + assertEquals(20.0f, gc.getMin(), 0.001f); + assertEquals(95.0f, gc.getMax(), 0.001f); + assertEquals(AnnotationColourGradient.BELOW_THRESHOLD, + gc.getThreshType()); + assertEquals(66.0f, gc.getThresh(), 0.001f); + + // inverse gradient high to low: + spec = "blue|255,0,255|95.0|20.0|below|66.0"; + gc = ff.parseGraduatedColourScheme("BETA-TURN-IR\t" + spec, spec); + assertTrue(gc.isAutoScale()); + assertTrue(gc.getTolow()); + } + + /** + * Test parsing a features file with GFF formatted content only + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParse_pureGff() throws Exception + { + File f = new File("examples/uniref50.fa"); + AlignmentI al = readAlignmentFile(f); + AlignFrame af = new AlignFrame(al, 500, 500); + Map colours = af.getFeatureRenderer() + .getFeatureColours(); + String gffData = "##gff-version 2\n" + + "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t" + + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n" + + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t."; + FeaturesFile featuresFile = new FeaturesFile(gffData, + FormatAdapter.PASTE); + assertTrue("Failed to parse features file", + featuresFile.parse(al.getDataset(), colours, true)); + + // verify feature on FER_CAPAA + SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence() + .getSequenceFeatures(); + assertEquals(1, sfs.length); + SequenceFeature sf = sfs[0]; + // description parsed from Note attribute + assertEquals("Iron-sulfur (2Fe-2S); another note", sf.description); + assertEquals(39, sf.begin); + assertEquals(39, sf.end); + assertEquals("uniprot", sf.featureGroup); + assertEquals("METAL", sf.type); + assertEquals( + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465", + sf.getValue("ATTRIBUTES")); + + // verify feature on FER1_SOLLC1 + sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures(); + assertEquals(1, sfs.length); + sf = sfs[0]; + assertEquals("uniprot", sf.description); + assertEquals(55, sf.begin); + assertEquals(130, sf.end); + assertEquals("uniprot", sf.featureGroup); + assertEquals("Pfam", sf.type); + assertEquals(3f, sf.getScore(), 0.001f); + } + + /** + * Test parsing a features file with Jalview format features (but no colour + * descriptors or startgroup to give the hint not to parse as GFF) + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParse_jalviewFeaturesOnly() throws Exception + { + File f = new File("examples/uniref50.fa"); + AlignmentI al = readAlignmentFile(f); + AlignFrame af = new AlignFrame(al, 500, 500); + Map colours = af.getFeatureRenderer() + .getFeatureColours(); + + /* + * one feature on FER_CAPAA and one on sequence 3 (index 2) FER1_SOLLC + */ + String featureData = "Iron-sulfur (2Fe-2S)\tFER_CAPAA\t-1\t39\t39\tMETAL\n" + + "Iron-phosphorus (2Fe-P)\tID_NOT_SPECIFIED\t2\t86\t87\tMETALLIC\n"; + FeaturesFile featuresFile = new FeaturesFile(featureData, + FormatAdapter.PASTE); + assertTrue("Failed to parse features file", + featuresFile.parse(al.getDataset(), colours, true)); + + // verify FER_CAPAA feature + SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence() + .getSequenceFeatures(); + assertEquals(1, sfs.length); + SequenceFeature sf = sfs[0]; + assertEquals("Iron-sulfur (2Fe-2S)", sf.description); + assertEquals(39, sf.begin); + assertEquals(39, sf.end); + assertEquals("METAL", sf.type); + + // verify FER1_SOLLC feature + sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures(); + assertEquals(1, sfs.length); + sf = sfs[0]; + assertEquals("Iron-phosphorus (2Fe-P)", sf.description); + assertEquals(86, sf.begin); + assertEquals(87, sf.end); + assertEquals("METALLIC", sf.type); + } } diff --git a/test/jalview/io/FileIOTester.java b/test/jalview/io/FileIOTester.java index fef7173..cde1cbc 100644 --- a/test/jalview/io/FileIOTester.java +++ b/test/jalview/io/FileIOTester.java @@ -69,7 +69,7 @@ public class FileIOTester { AssertJUnit.assertTrue("Couldn't resolve " + src + " as a valid file", fp.isValid()); - String type = new IdentifyFile().Identify(fp); + String type = new IdentifyFile().identify(fp); AssertJUnit.assertTrue("Data from '" + src + "' Expected to be '" + fmt + "' identified as '" + type + "'", type.equalsIgnoreCase(fmt)); } diff --git a/test/jalview/io/Gff3tests.java b/test/jalview/io/GffFileTest.java similarity index 81% rename from test/jalview/io/Gff3tests.java rename to test/jalview/io/GffFileTest.java index 3403f10..8e587b1 100644 --- a/test/jalview/io/Gff3tests.java +++ b/test/jalview/io/GffFileTest.java @@ -36,12 +36,12 @@ import java.io.IOException; import org.testng.annotations.Test; -public class Gff3tests +public class GffFileTest { private static String exonerateSeqs = "examples/testdata/exonerateseqs.fa", exonerateOutput = "examples/testdata/exonerateoutput.gff", - simpleGff3file = "examples/testdata/simpleGff3.gff"; + simpleGffFile = "examples/testdata/simpleGff3.gff"; @Test(groups = { "Functional" }) public void testExonerateImport() @@ -61,26 +61,25 @@ public class Gff3tests assertTrue("Expected at least one DNA protein association", 0 != af .getViewport().getAlignment().getDataset().getCodonFrames() - .size() - ); - + .size()); } @Test(groups = { "Functional" }) public void simpleGff3FileIdentify() { - assertEquals("Didn't recognise file correctly.", IdentifyFile.GFF3File, - new IdentifyFile().Identify(simpleGff3file, FormatAdapter.FILE)); + assertEquals("Didn't recognise file correctly.", + IdentifyFile.FeaturesFile, + new IdentifyFile().identify(simpleGffFile, FormatAdapter.FILE)); } @Test(groups = { "Functional" }) public void simpleGff3FileClass() throws IOException { AlignmentI dataset = new Alignment(new SequenceI[] {}); - FeaturesFile ffile = new FeaturesFile(simpleGff3file, + FeaturesFile ffile = new FeaturesFile(simpleGffFile, FormatAdapter.FILE); - boolean parseResult = ffile.parse(dataset, null, null, false, false); + boolean parseResult = ffile.parse(dataset, null, false, false); assertTrue("return result should be true", parseResult); checkDatasetfromSimpleGff3(dataset); } @@ -89,21 +88,22 @@ public class Gff3tests public void simpleGff3FileLoader() throws IOException { AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded( - simpleGff3file, FormatAdapter.FILE); + simpleGffFile, FormatAdapter.FILE); assertTrue( "Didn't read the alignment into an alignframe from Gff3 File", af != null); - checkDatasetfromSimpleGff3(af.getViewport().getAlignment().getDataset()); + // FIXME codon mappings are on the alignment but not on the dataset + checkDatasetfromSimpleGff3(af.getViewport().getAlignment()/* .getDataset() */); } @Test(groups = { "Functional" }) public void simpleGff3RelaxedIdMatching() throws IOException { AlignmentI dataset = new Alignment(new SequenceI[] {}); - FeaturesFile ffile = new FeaturesFile(simpleGff3file, + FeaturesFile ffile = new FeaturesFile(simpleGffFile, FormatAdapter.FILE); - boolean parseResult = ffile.parse(dataset, null, null, false, true); + boolean parseResult = ffile.parse(dataset, null, false, true); assertTrue("return result (relaxedID matching) should be true", parseResult); checkDatasetfromSimpleGff3(dataset); @@ -112,11 +112,11 @@ public class Gff3tests @Test(groups = { "Functional" }) public void readGff3File() throws IOException { - Gff3File gff3reader = new Gff3File(simpleGff3file, FormatAdapter.FILE); - Alignment dataset = new Alignment(gff3reader.getSeqsAsArray()); - gff3reader.addProperties(dataset); + FeaturesFile gffreader = new FeaturesFile(true, simpleGffFile, + FormatAdapter.FILE); + Alignment dataset = new Alignment(gffreader.getSeqsAsArray()); + gffreader.addProperties(dataset); checkDatasetfromSimpleGff3(dataset); - } private void checkDatasetfromSimpleGff3(AlignmentI dataset) @@ -156,16 +156,4 @@ public class Gff3tests && dataset.getCodonFrame(seq1).size() > 0); } - // @Test(groups ={ "Functional" }) - // public final void testPrintGFFFormatSequenceIArrayMapOfStringObject() - // { - // fail("Not yet implemented"); - // } - // - // @Test(groups ={ "Functional" }) - // public final void testAlignFileBooleanStringString() - // { - // fail("Not yet implemented"); - // } - } diff --git a/test/jalview/io/IdentifyFileTest.java b/test/jalview/io/IdentifyFileTest.java index c958ff0..5e376a6 100644 --- a/test/jalview/io/IdentifyFileTest.java +++ b/test/jalview/io/IdentifyFileTest.java @@ -20,6 +20,9 @@ */ package jalview.io; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertTrue; + import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -32,7 +35,7 @@ public class IdentifyFileTest { String protocol = AppletFormatAdapter.FILE; IdentifyFile ider = new IdentifyFile(); - String actualFiletype = ider.Identify(data, protocol); + String actualFiletype = ider.identify(data, protocol); Assert.assertEquals(actualFiletype, expectedFileType, "File identification Failed!"); } @@ -54,7 +57,10 @@ public class IdentifyFileTest { "examples/testdata/test.html", "HTML" }, { "examples/testdata/test.pileup", "PileUp" }, { "examples/testdata/test.blc", "BLC" }, - { "examples/testdata/simplegff3.gff", "GFF v2 or v3" }, + { "examples/exampleFeatures.txt", IdentifyFile.FeaturesFile }, + { "examples/testdata/simplegff3.gff", IdentifyFile.FeaturesFile }, + { "examples/testdata/exampleFeaturesMixed.gff", + IdentifyFile.FeaturesFile }, { "examples/testdata/test.jvp", "Jalview" }, { "examples/testdata/cullpdb_pc25_res3.0_R0.3_d150729_chains9361.fasta.15316", @@ -65,4 +71,22 @@ public class IdentifyFileTest }; } + @Test(groups = "Functional") + public void testLooksLikeFeatureData() + { + IdentifyFile id = new IdentifyFile(); + assertFalse(id.looksLikeFeatureData(null)); + assertFalse(id.looksLikeFeatureData("")); + // too few columns: + assertFalse(id.looksLikeFeatureData("1 \t 2 \t 3 \t 4 \t 5")); + // GFF format: + assertTrue(id + .looksLikeFeatureData("Seq1\tlocal\tHelix\t2456\t2462\tss")); + // Jalview format: + assertTrue(id.looksLikeFeatureData("Helix\tSeq1\t-1\t2456\t2462\tss")); + // non-numeric start column: + assertFalse(id.looksLikeFeatureData("Helix\tSeq1\t-1\t.\t2462\tss")); + // non-numeric start column: + assertFalse(id.looksLikeFeatureData("Helix\tSeq1\t-1\t2456\t.\tss")); + } } diff --git a/test/jalview/io/StockholmFileTest.java b/test/jalview/io/StockholmFileTest.java index e889837..d7a9166 100644 --- a/test/jalview/io/StockholmFileTest.java +++ b/test/jalview/io/StockholmFileTest.java @@ -54,7 +54,7 @@ public class StockholmFileTest { AppletFormatAdapter af = new AppletFormatAdapter(); AlignmentI al = af.readFile(PfamFile, af.FILE, - new IdentifyFile().Identify(PfamFile, af.FILE)); + new IdentifyFile().identify(PfamFile, af.FILE)); int numpdb = 0; for (SequenceI sq : al.getSequences()) { @@ -95,7 +95,7 @@ public class StockholmFileTest AppletFormatAdapter rf = new AppletFormatAdapter(); AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE, - new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE)); + new IdentifyFile().identify(ff, AppletFormatAdapter.FILE)); assertNotNull("Couldn't read supplied alignment data.", al); @@ -112,7 +112,7 @@ public class StockholmFileTest AppletFormatAdapter.PASTE, ioformat); assertNotNull("Couldn't parse reimported alignment data.", al_input); - String identifyoutput = new IdentifyFile().Identify(outputfile, + String identifyoutput = new IdentifyFile().identify(outputfile, AppletFormatAdapter.PASTE); assertNotNull("Identify routine failed for outputformat " + ioformat, identifyoutput); diff --git a/test/jalview/util/StringUtilsTest.java b/test/jalview/util/StringUtilsTest.java index 2342afe..0b776d1 100644 --- a/test/jalview/util/StringUtilsTest.java +++ b/test/jalview/util/StringUtilsTest.java @@ -24,7 +24,10 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertTrue; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.Map; import org.testng.annotations.Test; @@ -128,4 +131,79 @@ public class StringUtilsTest assertEquals("a*b*c*cde", StringUtils.arrayToSeparatorList(new String[] { "a", "b*c", "cde" }, "*")); } + + /** + * Test the method that parses lines like
+ * ID=2345;Name=Something; + */ + @Test(groups = { "Functional" }) + public void testParseNameValuePairs() + { + char[] separators = new char[] { ' ' }; + assertTrue(StringUtils.parseNameValuePairs(null, ";", separators) + .isEmpty()); + assertTrue(StringUtils.parseNameValuePairs("", ";", separators) + .isEmpty()); + assertTrue(StringUtils.parseNameValuePairs("hello=world", ";", + separators).isEmpty()); + + Map> map = StringUtils.parseNameValuePairs( + "hello world", ";", separators); + assertEquals(1, map.size()); + assertEquals(1, map.get("hello").size()); + assertEquals("world", map.get("hello").get(0)); + + separators = new char[] { ' ', '=' }; + map = StringUtils + .parseNameValuePairs( + "Method= manual curation ;nothing; Notes F2=S ; Notes=Metal; Type=", + ";", separators); + + // Type is ignored as no value was supplied + assertEquals(2, map.size()); + + // equals separator used ahead of space separator: + assertEquals(1, map.get("Method").size()); + assertEquals("manual curation", map.get("Method").get(0)); // trimmed + + assertEquals(2, map.get("Notes").size()); + // space separator used ahead of equals separator + assertEquals("F2=S", map.get("Notes").get(0)); + assertEquals("Metal", map.get("Notes").get(1)); + } + + @Test(groups = { "Functional" }) + public void testListToDelimitedString() + { + assertEquals("", StringUtils.listToDelimitedString(null, ";")); + List list = new ArrayList(); + assertEquals("", StringUtils.listToDelimitedString(list, ";")); + list.add("now"); + assertEquals("now", StringUtils.listToDelimitedString(list, ";")); + list.add("is"); + assertEquals("now;is", StringUtils.listToDelimitedString(list, ";")); + assertEquals("now ; is", StringUtils.listToDelimitedString(list, " ; ")); + list.add("the"); + list.add("winter"); + list.add("of"); + list.add("our"); + list.add("discontent"); + assertEquals("now is the winter of our discontent", + StringUtils.listToDelimitedString(list, " ")); + } + + @Test(groups = { "Functional" }) + public void testParseInt() + { + assertEquals(0, StringUtils.parseInt(null)); + assertEquals(0, StringUtils.parseInt("")); + assertEquals(0, StringUtils.parseInt("x")); + assertEquals(0, StringUtils.parseInt("1.2")); + assertEquals(33, StringUtils.parseInt("33")); + assertEquals(33, StringUtils.parseInt("+33")); + assertEquals(-123, StringUtils.parseInt("-123")); + // too big for an int: + assertEquals(0, + StringUtils.parseInt(String.valueOf(Integer.MAX_VALUE) + "1")); + } }