Merge branch 'develop' into JAL-1705_trialMerge
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 28 Jan 2016 12:07:40 +0000 (12:07 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 28 Jan 2016 12:07:40 +0000 (12:07 +0000)
Conflicts:
src/jalview/analysis/AlignmentUtils.java
src/jalview/analysis/CrossRef.java
src/jalview/structure/StructureSelectionManager.java
src/jalview/ws/dbsources/Uniprot.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/util/MappingUtilsTest.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java

149 files changed:
.classpath
.settings/org.eclipse.jdt.core.prefs
THIRDPARTYLIBS
examples/exampleFeatures.txt
examples/testdata/exonerateoutput.gff
examples/testdata/simpleGff3.gff [new file with mode: 0644]
examples/testdata/simplegff3.gff [deleted file]
help/html/features/featuresFormat.html
lib/biojava-core-4.1.0.jar [new file with mode: 0644]
lib/biojava-ontology-4.1.0.jar [new file with mode: 0644]
lib/htsjdk-1.133.jar [new file with mode: 0644]
resources/lang/Messages.properties
resources/so-xp-simple.obo.zip [new file with mode: 0644]
src/jalview/analysis/AAFrequency.java
src/jalview/analysis/AlignmentUtils.java
src/jalview/analysis/CrossRef.java
src/jalview/analysis/Dna.java
src/jalview/analysis/SequenceIdMatcher.java
src/jalview/api/FeaturesSourceI.java [new file with mode: 0644]
src/jalview/appletgui/AlignFrame.java
src/jalview/appletgui/CutAndPasteTransfer.java
src/jalview/appletgui/FeatureRenderer.java
src/jalview/appletgui/FeatureSettings.java
src/jalview/bin/Jalview.java
src/jalview/bin/JalviewLite.java
src/jalview/bin/JalviewLiteURLRetrieve.java
src/jalview/controller/AlignViewController.java
src/jalview/datamodel/AlignedCodonFrame.java
src/jalview/datamodel/Alignment.java
src/jalview/datamodel/AlignmentI.java
src/jalview/datamodel/DBRefSource.java
src/jalview/datamodel/Mapping.java
src/jalview/datamodel/MappingType.java [new file with mode: 0644]
src/jalview/datamodel/SearchResults.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceDummy.java
src/jalview/datamodel/SequenceFeature.java
src/jalview/ext/ensembl/EnsemblCdna.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblCds.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblGene.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblGenome.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblOverlap.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblProtein.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblRestClient.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblSeqProxy.java [new file with mode: 0644]
src/jalview/ext/ensembl/EnsemblSequenceFetcher.java [new file with mode: 0644]
src/jalview/ext/htsjdk/HtsContigDb.java [new file with mode: 0644]
src/jalview/gui/AlignFrame.java
src/jalview/gui/AlignViewport.java
src/jalview/gui/AnnotationExporter.java
src/jalview/gui/CutAndPasteTransfer.java
src/jalview/gui/Desktop.java
src/jalview/gui/FeatureColourChooser.java
src/jalview/gui/JDatabaseTree.java
src/jalview/gui/Jalview2XML.java
src/jalview/gui/SequenceFetcher.java
src/jalview/gui/SplitFrame.java
src/jalview/io/AlignFile.java
src/jalview/io/AppletFormatAdapter.java
src/jalview/io/FeaturesFile.java
src/jalview/io/FileLoader.java
src/jalview/io/FileParse.java
src/jalview/io/Gff3File.java [deleted file]
src/jalview/io/IdentifyFile.java
src/jalview/io/VamsasAppDatastore.java
src/jalview/io/gff/ExonerateHelper.java [new file with mode: 0644]
src/jalview/io/gff/Gff2Helper.java [new file with mode: 0644]
src/jalview/io/gff/Gff3Helper.java [new file with mode: 0644]
src/jalview/io/gff/GffHelperBase.java [new file with mode: 0644]
src/jalview/io/gff/GffHelperFactory.java [new file with mode: 0644]
src/jalview/io/gff/GffHelperI.java [new file with mode: 0644]
src/jalview/io/gff/InterProScanHelper.java [new file with mode: 0644]
src/jalview/io/gff/SequenceOntology.java [new file with mode: 0644]
src/jalview/io/packed/JalviewDataset.java
src/jalview/io/packed/ParsePackedSet.java
src/jalview/io/vamsas/Sequencefeature.java
src/jalview/jbgui/GAlignFrame.java
src/jalview/schemes/ResidueProperties.java
src/jalview/schemes/UserColourScheme.java
src/jalview/structure/StructureSelectionManager.java
src/jalview/util/DBRefUtils.java
src/jalview/util/MapList.java
src/jalview/util/MappingUtils.java
src/jalview/util/StringUtils.java
src/jalview/viewmodel/AlignmentViewport.java
src/jalview/ws/AWSThread.java
src/jalview/ws/DBRefFetcher.java
src/jalview/ws/SequenceFetcher.java
src/jalview/ws/dbsources/EmblCdsSource.java [moved from src/jalview/ws/dbsources/EmblCdsSouce.java with 89% similarity]
src/jalview/ws/dbsources/EmblSource.java
src/jalview/ws/dbsources/EmblXmlSource.java
src/jalview/ws/dbsources/GeneDbSource.java
src/jalview/ws/dbsources/Pdb.java
src/jalview/ws/dbsources/Pfam.java
src/jalview/ws/dbsources/PfamFull.java
src/jalview/ws/dbsources/PfamSeed.java
src/jalview/ws/dbsources/Rfam.java
src/jalview/ws/dbsources/RfamFull.java
src/jalview/ws/dbsources/RfamSeed.java
src/jalview/ws/dbsources/Uniprot.java
src/jalview/ws/dbsources/UniprotName.java [moved from src/jalview/ws/dbsources/UnprotName.java with 88% similarity]
src/jalview/ws/dbsources/Xfam.java
src/jalview/ws/jws1/JPredThread.java
src/jalview/ws/jws1/SeqSearchWSThread.java
src/jalview/ws/seqfetcher/ASequenceFetcher.java
src/jalview/ws/seqfetcher/DbSourceProxy.java
src/jalview/ws/seqfetcher/DbSourceProxyImpl.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/analysis/DnaTest.java
test/jalview/datamodel/AlignedCodonFrameTest.java
test/jalview/datamodel/AlignmentTest.java
test/jalview/datamodel/MappingTest.java
test/jalview/datamodel/MappingTypeTest.java [new file with mode: 0644]
test/jalview/datamodel/SearchResultsTest.java
test/jalview/datamodel/SequenceDummyTest.java
test/jalview/datamodel/SequenceFeatureTest.java [new file with mode: 0644]
test/jalview/datamodel/SequenceTest.java
test/jalview/ext/ensembl/ENSG00000157764.gff [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblRestClientTest.java [new file with mode: 0644]
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java [new file with mode: 0644]
test/jalview/ext/htsjdk/TestHtsContigDb.java [new file with mode: 0644]
test/jalview/ext/htsjdk/pgmB.fasta [new file with mode: 0644]
test/jalview/ext/htsjdk/pgmB.fasta.fai [new file with mode: 0644]
test/jalview/ext/jmol/JmolCommandsTest.java [new file with mode: 0644]
test/jalview/gui/AlignViewportTest.java
test/jalview/io/AnnotatedPDBFileInputTest.java
test/jalview/io/AnnotationFileIOTest.java
test/jalview/io/FeaturesFileTest.java
test/jalview/io/FileIOTester.java
test/jalview/io/Gff3tests.java [deleted file]
test/jalview/io/HtmlFileTest.java
test/jalview/io/IdentifyFileTest.java
test/jalview/io/StockholmFileTest.java
test/jalview/io/gff/ExonerateHelperTest.java [new file with mode: 0644]
test/jalview/io/gff/Gff3HelperTest.java [new file with mode: 0644]
test/jalview/io/gff/GffHelperBaseTest.java [new file with mode: 0644]
test/jalview/io/gff/GffHelperFactoryTest.java [new file with mode: 0644]
test/jalview/io/gff/GffTests.java [new file with mode: 0644]
test/jalview/io/gff/InterProScanHelperTest.java [new file with mode: 0644]
test/jalview/io/gff/SequenceOntologyTest.java [new file with mode: 0644]
test/jalview/structure/StructureSelectionManagerTest.java
test/jalview/util/DBRefUtilsTest.java
test/jalview/util/MapListTest.java
test/jalview/util/MappingUtilsTest.java
test/jalview/util/StringUtilsTest.java
test/jalview/ws/SequenceFetcherTest.java [new file with mode: 0644]
test/jalview/ws/jabaws/JalviewJabawsTestUtils.java
test/jalview/ws/seqfetcher/DbRefFetcherTest.java
utils/InstallAnywhere/Jalview.iap_xml

index 473d937..cad9e2b 100644 (file)
@@ -49,6 +49,7 @@
        <classpathentry kind="lib" path="lib/VARNAv3-93.jar"/>
        <classpathentry kind="lib" path="lib/jfreesvg-2.1.jar"/>
        <classpathentry kind="lib" path="lib/quaqua-filechooser-only-8.0.jar"/>
+       <classpathentry kind="lib" path="lib/htsjdk-1.133.jar"/>
        <classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/plugin"/>
        <classpathentry kind="lib" path="lib/xml-apis.jar"/>
        <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
@@ -66,5 +67,7 @@
        <classpathentry kind="lib" path="lib/java-json.jar"/>
        <classpathentry kind="lib" path="lib/Jmol-14.2.14_2015.06.11.jar"/>
        <classpathentry kind="con" path="org.testng.TESTNG_CONTAINER"/>
+       <classpathentry kind="lib" path="lib/biojava-core-4.1.0.jar"/>
+       <classpathentry kind="lib" path="lib/biojava-ontology-4.1.0.jar"/>
        <classpathentry kind="output" path="classes"/>
 </classpath>
index 3f8ca28..8a5e7a7 100644 (file)
@@ -21,7 +21,7 @@ org.eclipse.jdt.core.formatter.alignment_for_assignment=0
 org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
 org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
 org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
-org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=16
 org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
 org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
 org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
@@ -60,7 +60,7 @@ org.eclipse.jdt.core.formatter.brace_position_for_switch=next_line
 org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=next_line
 org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
 org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
-org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_block_comments=false
 org.eclipse.jdt.core.formatter.comment.format_header=false
 org.eclipse.jdt.core.formatter.comment.format_html=true
 org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
index 3094939..c6c817a 100644 (file)
@@ -45,6 +45,9 @@ jfreesvg-2.1.jar : GPL v3 licensed library from the JFree suite: http://www.jfre
 
 quaqua: v.8.0 (latest stable) by Randel S Hofer. LGPL and BSD Modified license: downloaded from http://www.randelshofer.ch/quaqua/ 
 
+lib/htsjdk-1.120-SNAPSHOT.jar: built from maven master at https://github.com/samtools/htsjdk MIT License to Broad Institute
+
+
 Additional dependencies
 
 examples/javascript/deployJava.js : http://java.com/js/deployJava.js
index 0bb8b7e..dfadb50 100755 (executable)
@@ -1,23 +1,5 @@
-#-------------------------------------------------------------------------------
-# Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
-# Copyright (C) $$Year-Rel$$ The Jalview Authors
-# 
-# This file is part of Jalview.
-# 
-# Jalview is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License 
-# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-#  
-# Jalview is distributed in the hope that it will be useful, but 
-# WITHOUT ANY WARRANTY; without even the implied warranty 
-# of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
-# PURPOSE.  See the GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
-# The Jalview Authors are detailed in the 'AUTHORS' file.
-#-------------------------------------------------------------------------------
-ST-TURN-IIL    705b23
-GAMMA-TURN-CLASSIC     788763
+ST-TURN-IIL    blue|255,0,255|absolute|20.0|95.0|below|66.0
+GAMMA-TURN-CLASSIC             red|0,255,255|20.0|95.0|below|66.0
 BETA-TURN-IR   9a6a94
 BETA-TURN-IL   d6a6ca
 BETA-BULGE     1dc451
index 3ea68dc..d3b5f9b 100644 (file)
@@ -1,3 +1,7 @@
+# (exonerate delimits GFF with [START|END] OF GFF DUMP)
+# --- START OF GFF DUMP ---
+#
+#
 ##gff-version 2
 ##source-version exonerate:protein2genome:local 2.2.0
 ##date 2015-01-16
@@ -9,5 +13,8 @@
 contig_1146    exonerate:protein2genome:local  gene    8534    11269   3652    -       .       gene_id 0 ; sequence DDB_G0269124 ; gene_orientation .
 contig_1146    exonerate:protein2genome:local  cds     8534    11269   .       -       .       
 contig_1146    exonerate:protein2genome:local  exon    8534    11269   .       -       .       insertions 3 ; deletions 6
+#TODO need to understand why GFF features is from 11269 but Align is from 11270
 contig_1146    exonerate:protein2genome:local  similarity      8534    11269   3652    -       .       alignment_id 0 ; Query DDB_G0269124 ; Align 11270 143 120 ; Align 11150 187 282 ; Align 10865 281 888 ; Align 9977 578 1068 ; Align 8909 935 375
+# and a made-up alignment to a sequence in exonerateseqs.fa
+contig_1146    exonerate:protein2genome:local  similarity      8534    11269   3652    -       .       alignment_id 0 ; Query DDB_G0280897 ; Align 11270 143 120 
 # --- END OF GFF DUMP ---
diff --git a/examples/testdata/simpleGff3.gff b/examples/testdata/simpleGff3.gff
new file mode 100644 (file)
index 0000000..d363bae
--- /dev/null
@@ -0,0 +1,28 @@
+##gff-version 2
+# exonerate output in gff2 format; not gff3 because
+#   - 'similarity' is not a Sequence Ontology term
+#   - attributes' name/values are separated by space ' ' not equals '='
+##source-version exonerate:protein2genome:local 2.2.0
+##date 2015-01-16
+##type DNA
+#
+# exonerate run with --showtargetgff generates 'features on the target' i.e. mappings to the query
+# tab-delimited
+# seqname source feature start end score strand frame attributes
+#
+seq1   exonerate:protein2genome:local  gene    8       11      3652    -       .       gene_id 0 ; sequence seq2 ; gene_orientation .
+seq1   exonerate:protein2genome:local  cds     9       11      .       -       .       
+seq1   exonerate:protein2genome:local  exon    9       11      .       -       .       insertions 3 ; deletions 6
+#seq1  exonerate:protein2genome:local  similarity      8       11      3652    -       .       alignment_id 0 ; Query seq2 ; Align 11 1 3
+seq1   exonerate:protein2genome:local  similarity      9       11      3652    -       .       alignment_id 0 ; Query seq2 ; Align 11 1 3
+#
+# appending FASTA sequences is strictly a GFF3 format feature
+# but Jalview is able to handle this mixture of GFF2 / GFF3 :-)
+#
+##FASTA
+>seq1
+ACTACGACACGACGACGACGACG
+>seq2
+CDEQEATGTQDAQEQAQC
+
+
diff --git a/examples/testdata/simplegff3.gff b/examples/testdata/simplegff3.gff
deleted file mode 100644 (file)
index 2ac5421..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-##gff-version 2
-##source-version exonerate:protein2genome:local 2.2.0
-##date 2015-01-16
-##type DNA
-#
-#
-# seqname source feature start end score strand frame attributes
-#
-seq1   exonerate:protein2genome:local  gene    8       11      3652    -       .       gene_id 0 ; sequence seq2 ; gene_orientation .
-seq1   exonerate:protein2genome:local  cds     9       11      .       -       .       
-seq1   exonerate:protein2genome:local  exon    9       11      .       -       .       insertions 3 ; deletions 6
-seq1   exonerate:protein2genome:local  similarity      8       11      3652    -       .       alignment_id 0 ; Query seq2 ; Align 11 1 3
-##FASTA
->seq1
-ACTACGACACGACGACGACGACG
->seq2
-CDEQEATGTQDAQEQAQC
-
-
index 84bc5d4..9f33b7b 100755 (executable)
@@ -83,7 +83,7 @@
         <li><em>label</em><br> Indicate that the feature
           description should be used to create a colour for features of
           this type.<br> <em>Note: if no threshold value is
-            needed then the final '|' may be ommitted.<br> This
+            needed then the final '|' may be omitted.<br> This
             keyword was added in Jalview 2.6
         </em></li>
 
 
   <p>
     If your sequence annotation is already available in GFF Format (see
-    <a href="http://www.sanger.ac.uk/resources/software/gff/spec.html">http://www.sanger.ac.uk/resources/software/gff/spec.html</a>),
+    <a href="http://gmod.org/wiki/GFF2">gmod.org/wiki/GFF2</a>),
     then you can leave it as is, after first adding a line containing
     only 'GFF' after any Jalview feature colour definitions (<em>this
       mixed format capability was added in Jalview 2.6</em>). Alternately,
 </pre>
 
   This format allows two alternate ways of referring to a sequence,
-  either by its text ID, or its index in an associated alignment.
+  either by its text ID, or its index (base 0) in an associated alignment.
   Normally, sequence features are associated with sequences rather than
   alignments, and the sequenceIndex field is given as &quot;-1&quot;. In
   order to specify a sequence by its index in a particular alignment,
diff --git a/lib/biojava-core-4.1.0.jar b/lib/biojava-core-4.1.0.jar
new file mode 100644 (file)
index 0000000..5a09c1f
Binary files /dev/null and b/lib/biojava-core-4.1.0.jar differ
diff --git a/lib/biojava-ontology-4.1.0.jar b/lib/biojava-ontology-4.1.0.jar
new file mode 100644 (file)
index 0000000..80737d5
Binary files /dev/null and b/lib/biojava-ontology-4.1.0.jar differ
diff --git a/lib/htsjdk-1.133.jar b/lib/htsjdk-1.133.jar
new file mode 100644 (file)
index 0000000..f084258
Binary files /dev/null and b/lib/htsjdk-1.133.jar differ
index ec5f592..876b815 100644 (file)
@@ -704,6 +704,8 @@ label.load_tree_for_sequence_set = Load a tree for this sequence set
 label.export_image = Export Image
 label.vamsas_store = VAMSAS store
 label.translate_cDNA = Translate as cDNA
+label.reverse = Reverse
+label.reverse_complement = Reverse Complement
 label.linked_view_title = Linked cDNA and protein view
 label.align = Align
 label.extract_scores = Extract Scores
@@ -1281,4 +1283,4 @@ exception.pdb_server_error = There seems to be an error from the PDB server
 exception.pdb_server_unreachable = Jalview is unable to reach the PDBe Solr server. \nPlease ensure that you are connected to the internet and try again.
 label.nw_mapping = Needleman & Wunsch Alignment
 label.sifts_mapping = SIFTs Mapping
-label.mapping_method = Sequence \u27f7 Structure mapping method
\ No newline at end of file
+label.mapping_method = Sequence \u27f7 Structure mapping method
diff --git a/resources/so-xp-simple.obo.zip b/resources/so-xp-simple.obo.zip
new file mode 100644 (file)
index 0000000..d150da0
Binary files /dev/null and b/resources/so-xp-simple.obo.zip differ
index 5227795..3d61b11 100755 (executable)
@@ -32,7 +32,6 @@ import jalview.util.QuickSort;
 import java.util.Arrays;
 import java.util.Hashtable;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Takes in a vector or array of sequences and column start and column end and
@@ -520,7 +519,7 @@ public class AAFrequency
           Hashtable[] hconsensus)
   {
     final char gapCharacter = alignment.getGapCharacter();
-    Set<AlignedCodonFrame> mappings = alignment.getCodonFrames();
+    List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
     if (mappings == null || mappings.isEmpty())
     {
       return;
@@ -541,12 +540,16 @@ public class AAFrequency
         {
           continue;
         }
-        char[] codon = MappingUtils.findCodonFor(seq, col, mappings);
-        int codonEncoded = CodingUtils.encodeCodon(codon);
-        if (codonEncoded >= 0)
+        List<char[]> codons = MappingUtils
+                .findCodonsFor(seq, col, mappings);
+        for (char[] codon : codons)
         {
-          codonCounts[codonEncoded + 2]++;
-          ungappedCount++;
+          int codonEncoded = CodingUtils.encodeCodon(codon);
+          if (codonEncoded >= 0)
+          {
+            codonCounts[codonEncoded + 2]++;
+            ungappedCount++;
+          }
         }
       }
       codonCounts[1] = ungappedCount;
index da5bc2f..fe95dca 100644 (file)
@@ -31,6 +31,7 @@ import jalview.datamodel.FeatureProperties;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.SearchResults;
 import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 import jalview.schemes.ResidueProperties;
@@ -45,7 +46,6 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -338,12 +338,12 @@ public class AlignmentUtils
    * Answers true if the mappings include one between the given (dataset)
    * sequences.
    */
-  public static boolean mappingExists(Set<AlignedCodonFrame> set,
+  public static boolean mappingExists(List<AlignedCodonFrame> mappings,
           SequenceI aaSeq, SequenceI cdnaSeq)
   {
-    if (set != null)
+    if (mappings != null)
     {
-      for (AlignedCodonFrame acf : set)
+      for (AlignedCodonFrame acf : mappings)
       {
         if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))
         {
@@ -514,8 +514,8 @@ public class AlignmentUtils
 
     /*
      * Locate the aligned source sequence whose dataset sequence is mapped. We
-     * just take the first match here (as we can't align cDNA like more than one
-     * protein sequence).
+     * just take the first match here (as we can't align like more than one
+     * sequence).
      */
     SequenceI alignFrom = null;
     AlignedCodonFrame mapping = null;
@@ -541,8 +541,8 @@ public class AlignmentUtils
   /**
    * Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to
    * match residues and codons. Flags control whether existing gaps in unmapped
-   * (intron) and mapped (exon) regions are preserved or not. Gaps linking intro
-   * and exon are only retained if both flags are set.
+   * (intron) and mapped (exon) regions are preserved or not. Gaps between
+   * intron and exon are only retained if both flags are set.
    * 
    * @param alignTo
    * @param alignFrom
@@ -558,9 +558,6 @@ public class AlignmentUtils
           boolean preserveUnmappedGaps)
   {
     // TODO generalise to work for Protein-Protein, dna-dna, dna-protein
-    final char[] thisSeq = alignTo.getSequence();
-    final char[] thatAligned = alignFrom.getSequence();
-    StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length);
 
     // aligned and dataset sequence positions, all base zero
     int thisSeqPos = 0;
@@ -570,13 +567,17 @@ public class AlignmentUtils
     char myGapChar = myGap.charAt(0);
     int ratio = myGap.length();
 
-    /*
-     * Traverse the aligned protein sequence.
-     */
     int fromOffset = alignFrom.getStart() - 1;
     int toOffset = alignTo.getStart() - 1;
     int sourceGapMappedLength = 0;
     boolean inExon = false;
+    final char[] thisSeq = alignTo.getSequence();
+    final char[] thatAligned = alignFrom.getSequence();
+    StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length);
+
+    /*
+     * Traverse the 'model' aligned sequence
+     */
     for (char sourceChar : thatAligned)
     {
       if (sourceChar == sourceGap)
@@ -586,7 +587,7 @@ public class AlignmentUtils
       }
 
       /*
-       * Found a residue. Locate its mapped codon (start) position.
+       * Found a non-gap character. Locate its mapped region if any.
        */
       sourceDsPos++;
       // Note mapping positions are base 1, our sequence positions base 0
@@ -595,11 +596,13 @@ public class AlignmentUtils
       if (mappedPos == null)
       {
         /*
-         * Abort realignment if unmapped protein. Or could ignore it??
+         * unmapped position; treat like a gap
          */
-        System.err.println("Can't align: no codon mapping to residue "
-                + sourceDsPos + "(" + sourceChar + ")");
-        return;
+        sourceGapMappedLength += ratio;
+        // System.err.println("Can't align: no codon mapping to residue "
+        // + sourceDsPos + "(" + sourceChar + ")");
+        // return;
+        continue;
       }
 
       int mappedCodonStart = mappedPos[0]; // position (1...) of codon start
@@ -669,8 +672,8 @@ public class AlignmentUtils
     }
 
     /*
-     * At end of protein sequence. Copy any remaining dna sequence, optionally
-     * including (intron) gaps. We do not copy trailing gaps in protein.
+     * At end of model aligned sequence. Copy any remaining target sequence, optionally
+     * including (intron) gaps.
      */
     while (thisSeqPos < thisSeq.length)
     {
@@ -679,6 +682,20 @@ public class AlignmentUtils
       {
         thisAligned.append(c);
       }
+      sourceGapMappedLength--;
+    }
+
+    /*
+     * finally add gaps to pad for any trailing source gaps or
+     * unmapped characters
+     */
+    if (preserveUnmappedGaps)
+    {
+      while (sourceGapMappedLength > 0)
+      {
+        thisAligned.append(myGapChar);
+        sourceGapMappedLength--;
+      }
     }
 
     /*
@@ -909,7 +926,7 @@ public class AlignmentUtils
     List<SequenceI> unmappedProtein = new ArrayList<SequenceI>();
     unmappedProtein.addAll(protein.getSequences());
 
-    Set<AlignedCodonFrame> mappings = protein.getCodonFrames();
+    List<AlignedCodonFrame> mappings = protein.getCodonFrames();
 
     /*
      * Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of
@@ -1048,7 +1065,7 @@ public class AlignmentUtils
     }
     AlignmentI dna = al1.isNucleotide() ? al1 : al2;
     AlignmentI protein = dna == al1 ? al2 : al1;
-    Set<AlignedCodonFrame> mappings = protein.getCodonFrames();
+    List<AlignedCodonFrame> mappings = protein.getCodonFrames();
     for (SequenceI dnaSeq : dna.getSequences())
     {
       for (SequenceI proteinSeq : protein.getSequences())
@@ -1072,7 +1089,7 @@ public class AlignmentUtils
    * @return
    */
   protected static boolean isMappable(SequenceI dnaSeq,
-          SequenceI proteinSeq, Set<AlignedCodonFrame> mappings)
+          SequenceI proteinSeq, List<AlignedCodonFrame> mappings)
   {
     if (dnaSeq == null || proteinSeq == null)
     {
@@ -1084,13 +1101,13 @@ public class AlignmentUtils
     SequenceI proteinDs = proteinSeq.getDatasetSequence() == null ? proteinSeq
             : proteinSeq.getDatasetSequence();
 
-    /*
-     * Already mapped?
-     */
     for (AlignedCodonFrame mapping : mappings)
     {
       if (proteinDs == mapping.getAaForDnaSeq(dnaDs))
       {
+        /*
+         * already mapped
+         */
         return true;
       }
     }
@@ -1301,21 +1318,21 @@ public class AlignmentUtils
   }
 
   /**
-   * Constructs an alignment consisting of the mapped exon regions in the given
+   * Constructs an alignment consisting of the mapped cds regions in the given
    * nucleotide sequences, and updates mappings to match.
    * 
    * @param dna
    *          aligned dna sequences
    * @param mappings
    *          from dna to protein; these are replaced with new mappings
-   * @return an alignment whose sequences are the exon-only parts of the dna
-   *         sequences (or null if no exons are found)
+   * @return an alignment whose sequences are the cds-only parts of the dna
+   *         sequences (or null if no cds are found)
    */
-  public static AlignmentI makeExonAlignment(SequenceI[] dna,
-          Set<AlignedCodonFrame> mappings)
+  public static AlignmentI makeCdsAlignment(SequenceI[] dna,
+          List<AlignedCodonFrame> mappings)
   {
-    Set<AlignedCodonFrame> newMappings = new LinkedHashSet<AlignedCodonFrame>();
-    List<SequenceI> exonSequences = new ArrayList<SequenceI>();
+    List<AlignedCodonFrame> newMappings = new ArrayList<AlignedCodonFrame>();
+    List<SequenceI> cdsSequences = new ArrayList<SequenceI>();
 
     for (SequenceI dnaSeq : dna)
     {
@@ -1325,17 +1342,17 @@ public class AlignmentUtils
       for (AlignedCodonFrame acf : seqMappings)
       {
         AlignedCodonFrame newMapping = new AlignedCodonFrame();
-        final List<SequenceI> mappedExons = makeExonSequences(ds, acf,
+        final List<SequenceI> mappedCds = makeCdsSequences(ds, acf,
                 newMapping);
-        if (!mappedExons.isEmpty())
+        if (!mappedCds.isEmpty())
         {
-          exonSequences.addAll(mappedExons);
+          cdsSequences.addAll(mappedCds);
           newMappings.add(newMapping);
         }
       }
     }
     AlignmentI al = new Alignment(
-            exonSequences.toArray(new SequenceI[exonSequences.size()]));
+            cdsSequences.toArray(new SequenceI[cdsSequences.size()]));
     al.setDataset(null);
 
     /*
@@ -1348,86 +1365,207 @@ public class AlignmentUtils
   }
 
   /**
-   * Helper method to make exon-only sequences and populate their mappings to
+   * Helper method to make cds-only sequences and populate their mappings to
    * protein products
    * <p>
    * For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein
    * then generate a sequence CCTTGA with mapping [1, 6] to the same protein
    * residues
    * <p>
-   * Typically eukaryotic dna will include exons encoding for a single peptide
+   * Typically eukaryotic dna will include cds encoding for a single peptide
    * sequence i.e. return a single result. Bacterial dna may have overlapping
-   * exon mappings coding for multiple peptides so return multiple results
+   * cds mappings coding for multiple peptides so return multiple results
    * (example EMBL KF591215).
    * 
    * @param dnaSeq
    *          a dna dataset sequence
    * @param mapping
    *          containing one or more mappings of the sequence to protein
-   * @param newMapping
-   *          the new mapping to populate, from the exon-only sequences to their
+   * @param newMappings
+   *          the new mapping to populate, from the cds-only sequences to their
    *          mapped protein sequences
    * @return
    */
-  protected static List<SequenceI> makeExonSequences(SequenceI dnaSeq,
-          AlignedCodonFrame mapping, AlignedCodonFrame newMapping)
+  protected static List<SequenceI> makeCdsSequences(SequenceI dnaSeq,
+          AlignedCodonFrame mapping, AlignedCodonFrame newMappings)
   {
-    List<SequenceI> exonSequences = new ArrayList<SequenceI>();
+    List<SequenceI> cdsSequences = new ArrayList<SequenceI>();
     List<Mapping> seqMappings = mapping.getMappingsForSequence(dnaSeq);
-    final char[] dna = dnaSeq.getSequence();
+
     for (Mapping seqMapping : seqMappings)
     {
-      StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
+      SequenceI cds = makeCdsSequence(dnaSeq, seqMapping);
+      cdsSequences.add(cds);
 
       /*
-       * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
+       * add new mappings, from dna to cds, and from cds to peptide 
        */
-      final List<int[]> dnaExonRanges = seqMapping.getMap().getFromRanges();
-      for (int[] range : dnaExonRanges)
+      MapList dnaToCds = addCdsMappings(dnaSeq, cds, seqMapping,
+              newMappings);
+
+      /*
+       * transfer any features on dna that overlap the CDS
+       */
+      transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */);
+    }
+    return cdsSequences;
+  }
+
+  /**
+   * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the
+   * feature start/end ranges, optionally omitting specified feature types.
+   * 
+   * @param fromSeq
+   * @param toSeq
+   * @param mapping
+   *          the mapping from 'fromSeq' to 'toSeq'
+   * @param omitting
+   */
+  protected static void transferFeatures(SequenceI fromSeq,
+          SequenceI toSeq, MapList mapping, String... omitting)
+  {
+    SequenceI copyTo = toSeq;
+    while (copyTo.getDatasetSequence() != null)
+    {
+      copyTo = copyTo.getDatasetSequence();
+    }
+
+    SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
+    if (sfs != null)
+    {
+      for (SequenceFeature sf : sfs)
       {
-        for (int pos = range[0]; pos <= range[1]; pos++)
+        String type = sf.getType();
+        boolean omit = false;
+        for (String toOmit : omitting)
+        {
+          if (type.equals(toOmit))
+          {
+            omit = true;
+          }
+        }
+        if (omit)
+        {
+          continue;
+        }
+
+        /*
+         * locate the mapped range - null if either start or end is
+         * not mapped (no partial overlaps are calculated)
+         */
+        int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd());
+        if (mappedTo != null)
         {
-          newSequence.append(dna[pos - 1]);
+          SequenceFeature copy = new SequenceFeature(sf);
+          copy.setBegin(Math.min(mappedTo[0], mappedTo[1]));
+          copy.setEnd(Math.max(mappedTo[0], mappedTo[1]));
+          copyTo.addSequenceFeature(copy);
         }
       }
+    }
+  }
 
-      SequenceI exon = new Sequence(dnaSeq.getName(),
-              newSequence.toString());
+  /**
+   * Creates and adds mappings
+   * <ul>
+   * <li>from cds to peptide</li>
+   * <li>from dna to cds</li>
+   * </ul>
+   * and returns the dna-to-cds mapping
+   * 
+   * @param dnaSeq
+   * @param cdsSeq
+   * @param dnaMapping
+   * @param newMappings
+   * @return
+   */
+  protected static MapList addCdsMappings(SequenceI dnaSeq,
+          SequenceI cdsSeq,
+          Mapping dnaMapping, AlignedCodonFrame newMappings)
+  {
+    cdsSeq.createDatasetSequence();
 
-      /*
-       * Locate any xrefs to CDS database on the protein product and attach to
-       * the CDS sequence. Also add as a sub-token of the sequence name.
-       */
-      // default to "CDS" if we can't locate an actual gene id
-      String cdsAccId = FeatureProperties
-              .getCodingFeature(DBRefSource.EMBL);
-      DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(seqMapping.getTo()
-              .getDBRefs(), DBRefSource.CODINGDBS);
-      if (cdsRefs != null)
+    /*
+     * CDS to peptide is just a contiguous 3:1 mapping, with
+     * the peptide ranges taken unchanged from the dna mapping
+     */
+    List<int[]> cdsRanges = new ArrayList<int[]>();
+    cdsRanges.add(new int[] { 1, cdsSeq.getLength() });
+    MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap()
+            .getToRanges(), 3, 1);
+    newMappings.addMap(cdsSeq.getDatasetSequence(), dnaMapping.getTo(),
+            cdsToPeptide);
+
+    /*
+     * dna 'from' ranges map 1:1 to the contiguous extracted CDS 
+     */
+    MapList dnaToCds = new MapList(
+            dnaMapping.getMap().getFromRanges(), cdsRanges, 1, 1);
+    newMappings.addMap(dnaSeq, cdsSeq.getDatasetSequence(), dnaToCds);
+    return dnaToCds;
+  }
+
+  /**
+   * Makes and returns a CDS-only sequence, where the CDS regions are identified
+   * as the 'from' ranges of the mapping on the dna.
+   * 
+   * @param dnaSeq
+   *          nucleotide sequence
+   * @param seqMapping
+   *          mappings from CDS regions of nucleotide
+   * @return
+   */
+  protected static SequenceI makeCdsSequence(SequenceI dnaSeq,
+          Mapping seqMapping)
+  {
+    StringBuilder newSequence = new StringBuilder(dnaSeq.getLength());
+    final char[] dna = dnaSeq.getSequence();
+    int offset = dnaSeq.getStart() - 1;
+
+    /*
+     * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc }
+     */
+    final List<int[]> dnaCdsRanges = seqMapping.getMap().getFromRanges();
+    for (int[] range : dnaCdsRanges)
+    {
+      // TODO handle reverse mapping as well (range[1] < range[0])
+      for (int pos = range[0]; pos <= range[1]; pos++)
       {
-        for (DBRefEntry cdsRef : cdsRefs)
-        {
-          exon.addDBRef(new DBRefEntry(cdsRef));
-          cdsAccId = cdsRef.getAccessionId();
-        }
+        newSequence.append(dna[pos - offset - 1]);
       }
-      exon.setName(exon.getName() + "|" + cdsAccId);
-      exon.createDatasetSequence();
+    }
 
-      /*
-       * Build new mappings - from the same protein regions, but now to
-       * contiguous exons
-       */
-      List<int[]> exonRange = new ArrayList<int[]>();
-      exonRange.add(new int[] { 1, newSequence.length() });
-      MapList map = new MapList(exonRange, seqMapping.getMap()
-              .getToRanges(), 3, 1);
-      newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map);
-      MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1);
-      newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap);
-
-      exonSequences.add(exon);
+    SequenceI cds = new Sequence(dnaSeq.getName(),
+            newSequence.toString());
+
+    transferDbRefs(seqMapping.getTo(), cds);
+
+    return cds;
+  }
+
+  /**
+   * Locate any xrefs to CDS databases on the protein product and attach to the
+   * CDS sequence. Also add as a sub-token of the sequence name.
+   * 
+   * @param from
+   * @param to
+   */
+  protected static void transferDbRefs(SequenceI from, SequenceI to)
+  {
+    String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL);
+    DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(),
+            DBRefSource.CODINGDBS);
+    if (cdsRefs != null)
+    {
+      for (DBRefEntry cdsRef : cdsRefs)
+      {
+        to.addDBRef(new DBRefEntry(cdsRef));
+        cdsAccId = cdsRef.getAccessionId();
+      }
+    }
+    if (!to.getName().contains(cdsAccId))
+    {
+      to.setName(to.getName() + "|" + cdsAccId);
     }
-    return exonSequences;
   }
 }
index a71e614..21fd08d 100644 (file)
@@ -25,6 +25,7 @@ import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.util.DBRefUtils;
@@ -88,41 +89,54 @@ public class CrossRef
   {
     String[] dbrefs = null;
     List<String> refs = new ArrayList<String>();
-    for (int s = 0; s < seqs.length; s++)
+    for (SequenceI seq : seqs)
     {
-      if (seqs[s] != null)
+      if (seq != null)
       {
-        SequenceI dss = seqs[s];
+        SequenceI dss = seq;
         while (dss.getDatasetSequence() != null)
         {
           dss = dss.getDatasetSequence();
         }
         DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs());
-        for (int r = 0; rfs != null && r < rfs.length; r++)
+        if (rfs != null)
         {
-          if (!refs.contains(rfs[r].getSource()))
+          for (DBRefEntry ref : rfs)
           {
-            refs.add(rfs[r].getSource());
+            if (!refs.contains(ref.getSource()))
+            {
+              refs.add(ref.getSource());
+            }
           }
         }
         if (dataset != null)
         {
           // search for references to this sequence's direct references.
-          DBRefEntry[] lrfs = CrossRef
-                  .findXDbRefs(!dna, seqs[s].getDBRefs());
+          DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
           List<SequenceI> rseqs = new ArrayList<SequenceI>();
-          CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,
+          CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
                   null); // don't need to specify codon frame for mapping here
           for (SequenceI rs : rseqs)
           {
-            DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); // not used??
-            for (int r = 0; rfs != null && r < rfs.length; r++)
+            DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs());
+            if (xrs != null)
             {
-              if (!refs.contains(rfs[r].getSource()))
+              for (DBRefEntry ref : xrs)
               {
-                refs.add(rfs[r].getSource());
+                if (!refs.contains(ref.getSource()))
+                {
+                  refs.add(ref.getSource());
+                }
               }
             }
+            // looks like copy and paste - change rfs to xrs?
+            // for (int r = 0; rfs != null && r < rfs.length; r++)
+            // {
+            // if (!refs.contains(rfs[r].getSource()))
+            // {
+            // refs.add(rfs[r].getSource());
+            // }
+            // }
           }
         }
       }
@@ -135,13 +149,9 @@ public class CrossRef
     return dbrefs;
   }
 
-  /*
-   * if (dna) { if (rfs[r].hasMap()) { // most likely this is a protein cross
-   * reference if (!refs.contains(rfs[r].getSource())) {
-   * refs.addElement(rfs[r].getSource()); } } }
-   */
   public static boolean hasCdnaMap(SequenceI[] seqs)
   {
+    // TODO unused - remove?
     String[] reftypes = findSequenceXrefTypes(false, seqs);
     for (int s = 0; s < reftypes.length; s++)
     {
@@ -156,6 +166,7 @@ public class CrossRef
 
   public static SequenceI[] getCdnaMap(SequenceI[] seqs)
   {
+    // TODO unused - remove?
     Vector cseqs = new Vector();
     for (int s = 0; s < seqs.length; s++)
     {
@@ -199,7 +210,9 @@ public class CrossRef
   /**
    * 
    * @param seqs
+   *          sequences whose xrefs are being retrieved
    * @param dna
+   *          true if sequences are nucleotide
    * @param source
    * @param dataset
    *          alignment to search for product sequences.
@@ -209,11 +222,10 @@ public class CrossRef
           String source, AlignmentI dataset)
   {
     List<SequenceI> rseqs = new ArrayList<SequenceI>();
-    Alignment ral = null;
-    AlignedCodonFrame cf = new AlignedCodonFrame(); // nominal width
-    for (int s = 0; s < seqs.length; s++)
+    AlignedCodonFrame cf = new AlignedCodonFrame();
+    for (SequenceI seq : seqs)
     {
-      SequenceI dss = seqs[s];
+      SequenceI dss = seq;
       while (dss.getDatasetSequence() != null)
       {
         dss = dss.getDatasetSequence();
@@ -223,7 +235,8 @@ public class CrossRef
       if ((xrfs == null || xrfs.length == 0) && dataset != null)
       {
         System.out.println("Attempting to find ds Xrefs refs.");
-        DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRefs());
+        // FIXME should be dss not seq here?
+        DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs());
         // less ambiguous would be a 'find primary dbRefEntry' method.
         // filter for desired source xref here
         found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
@@ -231,29 +244,30 @@ public class CrossRef
       }
       for (int r = 0; xrfs != null && r < xrfs.length; r++)
       {
-        if (source != null && !source.equals(xrfs[r].getSource()))
+        DBRefEntry xref = xrfs[r];
+        if (source != null && !source.equals(xref.getSource()))
         {
           continue;
         }
-        if (xrfs[r].hasMap())
+        if (xref.hasMap())
         {
-          if (xrfs[r].getMap().getTo() != null)
+          if (xref.getMap().getTo() != null)
           {
-            SequenceI rsq = new Sequence(xrfs[r].getMap().getTo());
+            SequenceI rsq = new Sequence(xref.getMap().getTo());
             rseqs.add(rsq);
-            if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r]
+            if (xref.getMap().getMap().getFromRatio() != xref
                     .getMap().getMap().getToRatio())
             {
               // get sense of map correct for adding to product alignment.
               if (dna)
               {
                 // map is from dna seq to a protein product
-                cf.addMap(dss, rsq, xrfs[r].getMap().getMap());
+                cf.addMap(dss, rsq, xref.getMap().getMap());
               }
               else
               {
                 // map should be from protein seq to its coding dna
-                cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse());
+                cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
               }
             }
             found = true;
@@ -265,7 +279,7 @@ public class CrossRef
           // xrefs on this sequence.
           if (dataset != null)
           {
-            found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); // ,false,!dna);
+            found |= searchDataset(dss, xref, dataset, rseqs, cf); // ,false,!dna);
             if (found)
             {
               xrfs[r] = null; // we've recovered seqs for this one.
@@ -313,15 +327,14 @@ public class CrossRef
             xrfs = t;
             try
             {
-              retrieved = sftch.getSequences(xrfs); // problem here is we don't
-              // know which of xrfs
-              // resulted in which
+              retrieved = sftch.getSequences(xrfs, !dna);
+              // problem here is we don't know which of xrfs resulted in which
               // retrieved element
             } catch (Exception e)
             {
               System.err
                       .println("Problem whilst retrieving cross references for Sequence : "
-                              + seqs[s].getName());
+                              + seq.getName());
               e.printStackTrace();
             }
             if (retrieved != null)
@@ -329,15 +342,14 @@ public class CrossRef
               for (int rs = 0; rs < retrieved.length; rs++)
               {
                 // TODO: examine each sequence for 'redundancy'
-                jalview.datamodel.DBRefEntry[] dbr = retrieved[rs]
-                        .getDBRefs();
+                DBRefEntry[] dbr = retrieved[rs].getDBRefs();
                 if (dbr != null && dbr.length > 0)
                 {
                   for (int di = 0; di < dbr.length; di++)
                   {
                     // find any entry where we should put in the sequence being
                     // cross-referenced into the map
-                    jalview.datamodel.Mapping map = dbr[di].getMap();
+                    Mapping map = dbr[di].getMap();
                     if (map != null)
                     {
                       if (map.getTo() != null && map.getMap() != null)
@@ -382,12 +394,14 @@ public class CrossRef
         }
       }
     }
+
+    Alignment ral = null;
     if (rseqs.size() > 0)
     {
       SequenceI[] rsqs = new SequenceI[rseqs.size()];
       rseqs.toArray(rsqs);
       ral = new Alignment(rsqs);
-      if (cf != null && cf.getProtMappings() != null)
+      if (cf != null && !cf.isEmpty())
       {
         ral.addCodonFrame(cf);
       }
index 2939e3c..be138f3 100644 (file)
@@ -806,4 +806,154 @@ public class Dna
       }
     }
   }
+
+  /**
+   * Returns an alignment consisting of the reversed (and optionally
+   * complemented) sequences set in this object's constructor
+   * 
+   * @param complement
+   * @return
+   */
+  public AlignmentI reverseCdna(boolean complement)
+  {
+    int sSize = selection.size();
+    List<SequenceI> reversed = new ArrayList<SequenceI>();
+    for (int s = 0; s < sSize; s++)
+    {
+      SequenceI newseq = reverseSequence(selection.get(s).getName(),
+              seqstring[s], complement);
+
+      if (newseq != null)
+      {
+        reversed.add(newseq);
+      }
+    }
+
+    SequenceI[] newseqs = reversed.toArray(new SequenceI[reversed.size()]);
+    AlignmentI al = new Alignment(newseqs);
+    ((Alignment) al).createDatasetAlignment();
+    return al;
+  }
+
+  /**
+   * Returns a reversed, and optionally complemented, sequence. The new
+   * sequence's name is the original name with "|rev" or "|revcomp" appended.
+   * aAcCgGtT and DNA ambiguity codes are complemented, any other characters are
+   * left unchanged.
+   * 
+   * @param seq
+   * @param complement
+   * @return
+   */
+  public static SequenceI reverseSequence(String seqName, String sequence,
+          boolean complement)
+  {
+    String newName = seqName + "|rev" + (complement ? "comp" : "");
+    char[] originalSequence = sequence.toCharArray();
+    int length = originalSequence.length;
+    char[] reversedSequence = new char[length];
+
+    for (int i = 0; i < length; i++)
+    {
+      reversedSequence[length - i - 1] = complement ? getComplement(originalSequence[i])
+              : originalSequence[i];
+    }
+    SequenceI reversed = new Sequence(newName, reversedSequence, 1, length);
+    return reversed;
+  }
+
+  /**
+   * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes
+   * are treated as on http://reverse-complement.com/. Anything else is left
+   * unchanged.
+   * 
+   * @param c
+   * @return
+   */
+  public static char getComplement(char c)
+  {
+    char result = c;
+    switch (c) {
+    case 'a':
+      result = 't';
+      break;
+    case 'A':
+      result = 'T';
+      break;
+    case 'c':
+      result = 'g';
+      break;
+    case 'C':
+      result = 'G';
+      break;
+    case 'g':
+      result = 'c';
+      break;
+    case 'G':
+      result = 'C';
+      break;
+    case 't':
+      result = 'a';
+      break;
+    case 'T':
+      result = 'A';
+      break;
+    case 'u':
+      result = 'a';
+      break;
+    case 'U':
+      result = 'A';
+      break;
+    case 'r':
+      result = 'y';
+      break;
+    case 'R':
+      result = 'Y';
+      break;
+    case 'y':
+      result = 'r';
+      break;
+    case 'Y':
+      result = 'R';
+      break;
+    case 'k':
+      result = 'm';
+      break;
+    case 'K':
+      result = 'M';
+      break;
+    case 'm':
+      result = 'k';
+      break;
+    case 'M':
+      result = 'K';
+      break;
+    case 'b':
+      result = 'v';
+      break;
+    case 'B':
+      result = 'V';
+      break;
+    case 'v':
+      result = 'b';
+      break;
+    case 'V':
+      result = 'B';
+      break;
+    case 'd':
+      result = 'h';
+      break;
+    case 'D':
+      result = 'H';
+      break;
+    case 'h':
+      result = 'd';
+      break;
+    case 'H':
+      result = 'D';
+      break;
+    }
+
+    return result;
+  }
 }
index 454bc09..23e9a14 100755 (executable)
@@ -304,7 +304,7 @@ public class SequenceIdMatcher
       }
       if (s instanceof SeqIdName)
       {
-        return this.equals((SeqIdName) s);
+        return this.equals(((SeqIdName) s).id);
       }
       else
       {
@@ -332,25 +332,8 @@ public class SequenceIdMatcher
      * todo: (JBPNote) Set separator characters appropriately
      * 
      * @param s
-     *          SeqIdName
      * @return boolean
      */
-    public boolean equals(SeqIdName s)
-    {
-      // TODO: JAL-732 patch for cases when name includes a list of IDs, and the
-      // match contains one ID flanked
-      if (id.length() > s.id.length())
-      {
-        return id.startsWith(s.id) ? (WORD_SEP.indexOf(id.charAt(s.id
-                .length())) > -1) : false;
-      }
-      else
-      {
-        return s.id.startsWith(id) ? (s.id.equals(id) ? true : (WORD_SEP
-                .indexOf(s.id.charAt(id.length())) > -1)) : false;
-      }
-    }
-
     public boolean equals(String s)
     {
       if (id.length() > s.length())
diff --git a/src/jalview/api/FeaturesSourceI.java b/src/jalview/api/FeaturesSourceI.java
new file mode 100644 (file)
index 0000000..8f8d8c1
--- /dev/null
@@ -0,0 +1,8 @@
+package jalview.api;
+
+/**
+ * A tagging interface to mark a source of sequence features
+ */
+public interface FeaturesSourceI
+{
+}
index b7e7899..e5f0053 100644 (file)
@@ -364,18 +364,15 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener,
   public boolean parseFeaturesFile(String file, String type,
           boolean autoenabledisplay)
   {
-    // TODO: test if importing a features file onto an alignment which already
-    // has features with links overwrites the original links.
-
-    Hashtable featureLinks = new Hashtable();
     boolean featuresFile = false;
     try
     {
-      featuresFile = new jalview.io.FeaturesFile(file, type).parse(viewport
-              .getAlignment(), alignPanel.seqPanel.seqCanvas
-              .getFeatureRenderer().getFeatureColours(), featureLinks,
-              true, viewport.applet.getDefaultParameter("relaxedidmatch",
-                      false));
+      Map<String, Object> colours = alignPanel.seqPanel.seqCanvas
+              .getFeatureRenderer().getFeatureColours();
+      boolean relaxedIdMatching = viewport.applet.getDefaultParameter(
+              "relaxedidmatch", false);
+      featuresFile = new FeaturesFile(file, type).parse(
+              viewport.getAlignment(), colours, true, relaxedIdMatching);
     } catch (Exception ex)
     {
       ex.printStackTrace();
@@ -383,10 +380,6 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener,
 
     if (featuresFile)
     {
-      if (featureLinks.size() > 0)
-      {
-        alignPanel.seqPanel.seqCanvas.getFeatureRenderer().featureLinks = featureLinks;
-      }
       if (autoenabledisplay)
       {
         viewport.setShowSequenceFeatures(true);
@@ -1420,15 +1413,16 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener,
   public String outputFeatures(boolean displayTextbox, String format)
   {
     String features;
+    FeaturesFile formatter = new FeaturesFile();
     if (format.equalsIgnoreCase("Jalview"))
     {
-      features = new FeaturesFile().printJalviewFormat(viewport
+      features = formatter.printJalviewFormat(viewport
               .getAlignment().getSequencesArray(),
               getDisplayedFeatureCols());
     }
     else
     {
-      features = new FeaturesFile().printGFFFormat(viewport.getAlignment()
+      features = formatter.printGffFormat(viewport.getAlignment()
               .getSequencesArray(), getDisplayedFeatureCols());
     }
 
index 70a7319..bbaeb68 100644 (file)
@@ -22,6 +22,7 @@ package jalview.appletgui;
 
 import jalview.analysis.AlignmentUtils;
 import jalview.api.ComplexAlignFile;
+import jalview.api.FeaturesSourceI;
 import jalview.bin.JalviewLite;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.ColumnSelection;
@@ -116,6 +117,7 @@ public class CutAndPasteTransfer extends Panel implements ActionListener,
     addSequences.setVisible(false);
   }
 
+  @Override
   public void actionPerformed(ActionEvent evt)
   {
     if (evt.getSource() == accept)
@@ -223,7 +225,7 @@ public class CutAndPasteTransfer extends Panel implements ActionListener,
   {
     AlignmentI al = null;
 
-    String format = new IdentifyFile().Identify(text,
+    String format = new IdentifyFile().identify(text,
             AppletFormatAdapter.PASTE);
     AppletFormatAdapter afa = new AppletFormatAdapter(alignFrame.alignPanel);
     try
@@ -277,6 +279,10 @@ public class CutAndPasteTransfer extends Panel implements ActionListener,
         {
           af = new AlignFrame(al, alignFrame.viewport.applet,
                   "Cut & Paste input - " + format, false);
+          if (source instanceof FeaturesSourceI)
+          {
+            af.getAlignViewport().setShowSequenceFeatures(true);
+          }
         }
 
         af.statusBar
@@ -490,6 +496,7 @@ public class CutAndPasteTransfer extends Panel implements ActionListener,
     this.add(textarea, java.awt.BorderLayout.CENTER);
   }
 
+  @Override
   public void mousePressed(MouseEvent evt)
   {
     if (textarea.getText().startsWith(
@@ -499,18 +506,22 @@ public class CutAndPasteTransfer extends Panel implements ActionListener,
     }
   }
 
+  @Override
   public void mouseReleased(MouseEvent evt)
   {
   }
 
+  @Override
   public void mouseClicked(MouseEvent evt)
   {
   }
 
+  @Override
   public void mouseEntered(MouseEvent evt)
   {
   }
 
+  @Override
   public void mouseExited(MouseEvent evt)
   {
   }
index 4655ba5..4391fa2 100644 (file)
@@ -43,7 +43,6 @@ import java.awt.TextArea;
 import java.awt.TextField;
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
-import java.util.Hashtable;
 
 /**
  * DOCUMENT ME!
@@ -54,11 +53,6 @@ import java.util.Hashtable;
 public class FeatureRenderer extends
         jalview.renderer.seqfeatures.FeatureRenderer
 {
-
-  // Holds web links for feature groups and feature types
-  // in the form label|link
-  Hashtable featureLinks = null;
-
   /**
    * Creates a new FeatureRenderer object.
    * 
@@ -154,6 +148,7 @@ public class FeatureRenderer extends
       super(null);
     }
 
+    @Override
     public void paint(Graphics g)
     {
       Dimension d = getSize();
@@ -227,6 +222,7 @@ public class FeatureRenderer extends
 
       overlaps.addItemListener(new java.awt.event.ItemListener()
       {
+        @Override
         public void itemStateChanged(java.awt.event.ItemEvent e)
         {
           int index = overlaps.getSelectedIndex();
@@ -344,6 +340,7 @@ public class FeatureRenderer extends
       dialog.buttonPanel.add(deleteButton, 1);
       deleteButton.addActionListener(new ActionListener()
       {
+        @Override
         public void actionPerformed(ActionEvent evt)
         {
           deleteFeature = true;
@@ -370,6 +367,7 @@ public class FeatureRenderer extends
     // TODO: render the graduated color in the box.
     colourPanel.addMouseListener(new java.awt.event.MouseAdapter()
     {
+      @Override
       public void mousePressed(java.awt.event.MouseEvent evt)
       {
         if (!colourPanel.isGcol)
index 1c156dc..23ca5ae 100755 (executable)
@@ -165,6 +165,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     final FeatureSettings me = this;
     frame.addWindowListener(new WindowAdapter()
     {
+      @Override
       public void windowClosing(WindowEvent e)
       {
         if (me.av.featureSettings == me)
@@ -185,6 +186,7 @@ public class FeatureSettings extends Panel implements ItemListener,
             width, height);
   }
 
+  @Override
   public void paint(Graphics g)
   {
     g.setColor(Color.black);
@@ -212,6 +214,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     scr.addActionListener(new ActionListener()
     {
 
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         me.ap.alignFrame.avc
@@ -224,6 +227,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     dens.addActionListener(new ActionListener()
     {
 
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         me.ap.alignFrame.avc
@@ -258,6 +262,7 @@ public class FeatureSettings extends Panel implements ItemListener,
         mxcol.addActionListener(new ActionListener()
         {
 
+          @Override
           public void actionPerformed(ActionEvent e)
           {
             if (typeCol instanceof Color)
@@ -312,9 +317,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     for (String group : fr.getFeatureGroups())
     {
       boolean vis = fr.checkGroupVisibility(group, false);
-      Checkbox check = new MyCheckbox(group, vis,
-              (fr.featureLinks != null && fr.featureLinks
-                      .containsKey(group)));
+      Checkbox check = new MyCheckbox(group, vis, false);
       check.addMouseListener(this);
       check.setFont(new Font("Serif", Font.BOLD, 12));
       check.addItemListener(groupItemListener);
@@ -452,10 +455,7 @@ public class FeatureSettings extends Panel implements ItemListener,
         selected = true;
       }
 
-      check = new MyCheckbox(
-              type,
-              selected,
-              (fr.featureLinks != null && fr.featureLinks.containsKey(type)),
+      check = new MyCheckbox(type, selected, false,
               fr.getFeatureStyle(type));
 
       check.addMouseListener(this);
@@ -474,6 +474,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     }
   }
 
+  @Override
   public void actionPerformed(ActionEvent evt)
   {
     for (int i = 0; i < featurePanel.getComponentCount(); i++)
@@ -486,6 +487,7 @@ public class FeatureSettings extends Panel implements ItemListener,
 
   private ItemListener groupItemListener = new ItemListener()
   {
+    @Override
     public void itemStateChanged(ItemEvent evt)
     {
       Checkbox source = (Checkbox) evt.getSource();
@@ -500,6 +502,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     };
   };
 
+  @Override
   public void itemStateChanged(ItemEvent evt)
   {
     selectionChanged();
@@ -533,22 +536,7 @@ public class FeatureSettings extends Panel implements ItemListener,
 
   boolean dragging = false;
 
-  public void mousePressed(MouseEvent evt)
-  {
-
-    selectedCheck = (MyCheckbox) evt.getSource();
-
-    if (fr.featureLinks != null
-            && fr.featureLinks.containsKey(selectedCheck.type))
-    {
-      if (evt.getX() > selectedCheck.stringWidth + 20)
-      {
-        evt.consume();
-      }
-    }
-
-  }
-
+  @Override
   public void mouseDragged(MouseEvent evt)
   {
     if (((Component) evt.getSource()).getParent() != featurePanel)
@@ -558,6 +546,7 @@ public class FeatureSettings extends Panel implements ItemListener,
     dragging = true;
   }
 
+  @Override
   public void mouseReleased(MouseEvent evt)
   {
     if (((Component) evt.getSource()).getParent() != featurePanel)
@@ -633,14 +622,17 @@ public class FeatureSettings extends Panel implements ItemListener,
     ap.paintAlignment(true);
   }
 
+  @Override
   public void mouseEntered(MouseEvent evt)
   {
   }
 
+  @Override
   public void mouseExited(MouseEvent evt)
   {
   }
 
+  @Override
   public void mouseClicked(MouseEvent evt)
   {
     MyCheckbox check = (MyCheckbox) evt.getSource();
@@ -648,16 +640,6 @@ public class FeatureSettings extends Panel implements ItemListener,
     {
       this.popupSort(check, fr.getMinMax(), evt.getX(), evt.getY());
     }
-    if (fr.featureLinks != null && fr.featureLinks.containsKey(check.type))
-    {
-      if (evt.getX() > check.stringWidth + 20)
-      {
-        evt.consume();
-        String link = fr.featureLinks.get(check.type).toString();
-        ap.alignFrame.showURL(link.substring(link.indexOf("|") + 1),
-                link.substring(0, link.indexOf("|")));
-      }
-    }
 
     if (check.getParent() != featurePanel)
     {
@@ -680,10 +662,12 @@ public class FeatureSettings extends Panel implements ItemListener,
     }
   }
 
+  @Override
   public void mouseMoved(MouseEvent evt)
   {
   }
 
+  @Override
   public void adjustmentValueChanged(AdjustmentEvent evt)
   {
     fr.setTransparency((100 - transparency.getValue()) / 100f);
@@ -764,6 +748,7 @@ public class FeatureSettings extends Panel implements ItemListener,
       updateColor(featureStyle);
     }
 
+    @Override
     public void paint(Graphics g)
     {
       Dimension d = getSize();
@@ -802,4 +787,9 @@ public class FeatureSettings extends Panel implements ItemListener,
     }
   }
 
+  @Override
+  public void mousePressed(MouseEvent e)
+  {
+  }
+
 }
index 462f5a7..8fe3bca 100755 (executable)
@@ -377,7 +377,7 @@ public class Jalview
 
       protocol = jalview.io.AppletFormatAdapter.checkProtocol(file);
 
-      format = new jalview.io.IdentifyFile().Identify(file, protocol);
+      format = new jalview.io.IdentifyFile().identify(file, protocol);
 
       AlignFrame af = fileLoader.LoadFileWaitTillLoaded(file, protocol,
               format);
@@ -627,7 +627,7 @@ public class Jalview
       }
       else
       {
-        format = new jalview.io.IdentifyFile().Identify(file, protocol);
+        format = new jalview.io.IdentifyFile().identify(file, protocol);
       }
 
       startUpAlframe = fileLoader.LoadFileWaitTillLoaded(file, protocol,
index 36a7cff..ae84ba5 100644 (file)
@@ -850,7 +850,7 @@ public class JalviewLite extends Applet implements
   {
     AlignmentI al = null;
 
-    String format = new IdentifyFile().Identify(text,
+    String format = new IdentifyFile().identify(text,
             AppletFormatAdapter.PASTE);
     try
     {
@@ -1967,7 +1967,7 @@ public class JalviewLite extends Applet implements
         return null;
       }
       String resolvedFile = resolveFileProtocol(fileParam);
-      String format = new IdentifyFile().Identify(resolvedFile, protocol);
+      String format = new IdentifyFile().identify(resolvedFile, protocol);
       dbgMsg("File identified as '" + format + "'");
       AlignmentI al = null;
       try
index 6be1016..fd88028 100644 (file)
@@ -113,7 +113,7 @@ public class JalviewLiteURLRetrieve extends Applet
       String format = getParameter("format");
       if (format == null || format.length() == 0)
       {
-        format = new jalview.io.IdentifyFile().Identify(file, protocol);
+        format = new jalview.io.IdentifyFile().identify(file, protocol);
         System.out.println("Format is " + format);
       }
       else
index 06b1e4c..151af0e 100644 (file)
@@ -393,7 +393,7 @@ public class AlignViewController implements AlignViewControllerI
     boolean featuresFile = false;
     try
     {
-      featuresFile = new FeaturesFile(file, protocol).parse(viewport
+      featuresFile = new FeaturesFile(false, file, protocol).parse(viewport
               .getAlignment().getDataset(), alignPanel.getFeatureRenderer()
               .getFeatureColours(), false, relaxedIdMatching);
     } catch (Exception ex)
index 9c642cf..5dfd434 100644 (file)
@@ -33,23 +33,40 @@ import java.util.List;
 public class AlignedCodonFrame
 {
 
-  /**
-   * tied array of na Sequence objects.
+  /*
+   * Data bean to hold mappings from one sequence to another
    */
-  private SequenceI[] dnaSeqs = null;
+  private class SequenceToSequenceMapping
+  {
+    private SequenceI fromSeq;
 
-  /**
-   * tied array of Mappings to protein sequence Objects and SequenceI[]
-   * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
-   * element to corresponding aaSeqs element
-   */
-  private Mapping[] dnaToProt = null;
+    private Mapping mapping;
+
+    SequenceToSequenceMapping(SequenceI from, Mapping map)
+    {
+      this.fromSeq = from;
+      this.mapping = map;
+    }
+
+    /**
+     * Readable representation for debugging only, not guaranteed not to change
+     */
+    @Override
+    public String toString()
+    {
+      return String.format("From %s %s", fromSeq.getName(),
+              mapping.toString());
+    }
+  }
+
+  private List<SequenceToSequenceMapping> mappings;
 
   /**
    * Constructor
    */
   public AlignedCodonFrame()
   {
+    mappings = new ArrayList<SequenceToSequenceMapping>();
   }
 
   /**
@@ -62,68 +79,75 @@ public class AlignedCodonFrame
    */
   public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
   {
-    int nlen = 1;
-    if (dnaSeqs != null)
-    {
-      nlen = dnaSeqs.length + 1;
-    }
-    SequenceI[] ndna = new SequenceI[nlen];
-    Mapping[] ndtp = new Mapping[nlen];
-    if (dnaSeqs != null)
-    {
-      System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
-      System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
-    }
-    dnaSeqs = ndna;
-    dnaToProt = ndtp;
-    nlen--;
-    dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
-            .getDatasetSequence();
-    Mapping mp = new Mapping(map);
     // JBPNote DEBUG! THIS !
     // dnaseq.transferAnnotation(aaseq, mp);
     // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
-    mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
+
+    SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq
+            : dnaseq.getDatasetSequence();
+    SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
             .getDatasetSequence();
-    dnaToProt[nlen] = mp;
+
+    /*
+     * if we already hold a mapping between these sequences, just add to it 
+     */
+    for (SequenceToSequenceMapping ssm : mappings)
+    {
+      if (ssm.fromSeq == fromSeq && ssm.mapping.to == toSeq)
+      {
+        ssm.mapping.map.addMapList(map);
+        return;
+      }
+    }
+
+    /*
+     * otherwise, add a new sequence mapping
+     */
+    Mapping mp = new Mapping(toSeq, map);
+    mappings.add(new SequenceToSequenceMapping(fromSeq, mp));
   }
 
   public SequenceI[] getdnaSeqs()
   {
-    return dnaSeqs;
+    // TODO return a list instead?
+    // return dnaSeqs;
+    List<SequenceI> seqs = new ArrayList<SequenceI>();
+    for (SequenceToSequenceMapping ssm : mappings)
+    {
+      seqs.add(ssm.fromSeq);
+    }
+    return seqs.toArray(new SequenceI[seqs.size()]);
   }
 
   public SequenceI[] getAaSeqs()
   {
-    if (dnaToProt == null)
+    // TODO not used - remove?
+    List<SequenceI> seqs = new ArrayList<SequenceI>();
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      return null;
-    }
-    SequenceI[] sqs = new SequenceI[dnaToProt.length];
-    for (int sz = 0; sz < dnaToProt.length; sz++)
-    {
-      sqs[sz] = dnaToProt[sz].to;
+      seqs.add(ssm.mapping.to);
     }
-    return sqs;
+    return seqs.toArray(new SequenceI[seqs.size()]);
   }
 
   public MapList[] getdnaToProt()
   {
-    if (dnaToProt == null)
-    {
-      return null;
-    }
-    MapList[] sqs = new MapList[dnaToProt.length];
-    for (int sz = 0; sz < dnaToProt.length; sz++)
+    List<MapList> maps = new ArrayList<MapList>();
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      sqs[sz] = dnaToProt[sz].map;
+      maps.add(ssm.mapping.map);
     }
-    return sqs;
+    return maps.toArray(new MapList[maps.size()]);
   }
 
   public Mapping[] getProtMappings()
   {
-    return dnaToProt;
+    List<Mapping> maps = new ArrayList<Mapping>();
+    for (SequenceToSequenceMapping ssm : mappings)
+    {
+      maps.add(ssm.mapping);
+    }
+    return maps.toArray(new Mapping[maps.size()]);
   }
 
   /**
@@ -135,18 +159,14 @@ public class AlignedCodonFrame
    */
   public Mapping getMappingForSequence(SequenceI seq)
   {
-    if (dnaSeqs == null)
-    {
-      return null;
-    }
     SequenceI seqDs = seq.getDatasetSequence();
     seqDs = seqDs != null ? seqDs : seq;
 
-    for (int ds = 0; ds < dnaSeqs.length; ds++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs)
+      if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs)
       {
-        return dnaToProt[ds];
+        return ssm.mapping;
       }
     }
     return null;
@@ -161,16 +181,12 @@ public class AlignedCodonFrame
    */
   public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
   {
-    if (dnaSeqs == null)
-    {
-      return null;
-    }
     SequenceI dnads = dnaSeqRef.getDatasetSequence();
-    for (int ds = 0; ds < dnaSeqs.length; ds++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
+      if (ssm.fromSeq == dnaSeqRef || ssm.fromSeq == dnads)
       {
-        return dnaToProt[ds].to;
+        return ssm.mapping.to;
       }
     }
     return null;
@@ -183,16 +199,12 @@ public class AlignedCodonFrame
    */
   public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
   {
-    if (dnaToProt == null)
-    {
-      return null;
-    }
     SequenceI aads = aaSeqRef.getDatasetSequence();
-    for (int as = 0; as < dnaToProt.length; as++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
+      if (ssm.mapping.to == aaSeqRef || ssm.mapping.to == aads)
       {
-        return dnaSeqs[as];
+        return ssm.fromSeq;
       }
     }
     return null;
@@ -224,36 +236,30 @@ public class AlignedCodonFrame
   public void markMappedRegion(SequenceI seq, int index,
           SearchResults results)
   {
-    if (dnaToProt == null)
-    {
-      return;
-    }
     int[] codon;
     SequenceI ds = seq.getDatasetSequence();
-    for (int mi = 0; mi < dnaToProt.length; mi++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
+      if (ssm.fromSeq == seq || ssm.fromSeq == ds)
       {
-        // DEBUG System.err.println("dna pos "+index);
-        codon = dnaToProt[mi].map.locateInTo(index, index);
+        codon = ssm.mapping.map.locateInTo(index, index);
         if (codon != null)
         {
           for (int i = 0; i < codon.length; i += 2)
           {
-            results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
+            results.addResult(ssm.mapping.to, codon[i], codon[i + 1]);
           }
         }
       }
-      else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
+      else if (ssm.mapping.to == seq || ssm.mapping.to == ds)
       {
-        // DEBUG System.err.println("aa pos "+index);
         {
-          codon = dnaToProt[mi].map.locateInFrom(index, index);
+          codon = ssm.mapping.map.locateInFrom(index, index);
           if (codon != null)
           {
             for (int i = 0; i < codon.length; i += 2)
             {
-              results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
+              results.addResult(ssm.fromSeq, codon[i], codon[i + 1]);
             }
           }
         }
@@ -282,13 +288,15 @@ public class AlignedCodonFrame
      * Adapted from markMappedRegion().
      */
     MapList ml = null;
-    for (int i = 0; i < dnaToProt.length; i++)
+    int i = 0;
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaSeqs[i] == seq)
+      if (ssm.fromSeq == seq)
       {
         ml = getdnaToProt()[i];
         break;
       }
+      i++;
     }
     return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
   }
@@ -307,18 +315,15 @@ public class AlignedCodonFrame
     /*
      * Search mapped protein ('to') sequences first.
      */
-    if (this.dnaToProt != null)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      for (int i = 0; i < dnaToProt.length; i++)
+      if (ssm.fromSeq == seq)
       {
-        if (this.dnaSeqs[i] == seq)
+        for (SequenceI sourceAligned : al.getSequences())
         {
-          for (SequenceI sourceAligned : al.getSequences())
+          if (ssm.mapping.to == sourceAligned.getDatasetSequence())
           {
-            if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence())
-            {
-              return sourceAligned;
-            }
+            return sourceAligned;
           }
         }
       }
@@ -327,18 +332,15 @@ public class AlignedCodonFrame
     /*
      * Then try mapped dna sequences.
      */
-    if (this.dnaToProt != null)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      for (int i = 0; i < dnaToProt.length; i++)
+      if (ssm.mapping.to == seq)
       {
-        if (this.dnaToProt[i].to == seq)
+        for (SequenceI sourceAligned : al.getSequences())
         {
-          for (SequenceI sourceAligned : al.getSequences())
+          if (ssm.fromSeq == sourceAligned.getDatasetSequence())
           {
-            if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence())
-            {
-              return sourceAligned;
-            }
+            return sourceAligned;
           }
         }
       }
@@ -348,31 +350,45 @@ public class AlignedCodonFrame
   }
 
   /**
-   * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to
-   * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is
-   * a set of start/end position pairs.
+   * Returns the region in the target sequence's dataset that is mapped to the
+   * given position (base 1) in the query sequence's dataset. The region is a
+   * set of start/end position pairs.
    * 
-   * @param mappedFrom
-   * @param mappedTo
-   * @param pos
+   * @param target
+   * @param query
+   * @param queryPos
    * @return
    */
-  public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo,
-          int pos)
+  public int[] getMappedRegion(SequenceI target, SequenceI query,
+          int queryPos)
   {
-    SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom
-            : mappedFrom.getDatasetSequence();
-    SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo
-            : mappedTo.getDatasetSequence();
-    if (targetDs == null || sourceDs == null || dnaToProt == null)
+    SequenceI targetDs = target.getDatasetSequence() == null ? target
+            : target.getDatasetSequence();
+    SequenceI queryDs = query.getDatasetSequence() == null ? query : query
+            .getDatasetSequence();
+    if (targetDs == null || queryDs == null /*|| dnaToProt == null*/)
     {
       return null;
     }
-    for (int mi = 0; mi < dnaToProt.length; mi++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs)
+      /*
+       * try mapping from target to query
+       */
+      if (ssm.fromSeq == targetDs && ssm.mapping.to == queryDs)
+      {
+        int[] codon = ssm.mapping.map.locateInFrom(queryPos, queryPos);
+        if (codon != null)
+        {
+          return codon;
+        }
+      }
+      /*
+       * else try mapping from query to target
+       */
+      else if (ssm.fromSeq == queryDs && ssm.mapping.to == targetDs)
       {
-        int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos);
+        int[] codon = ssm.mapping.map.locateInTo(queryPos, queryPos);
         if (codon != null)
         {
           return codon;
@@ -383,8 +399,10 @@ public class AlignedCodonFrame
   }
 
   /**
-   * Returns the DNA codon for the given position (base 1) in a mapped protein
-   * sequence, or null if no mapping is found.
+   * Returns the mapped DNA codons for the given position in a protein sequence,
+   * or null if no mapping is found. Returns a list of (e.g.) ['g', 'c', 't']
+   * codons. There may be more than one codon mapped to the protein if (for
+   * example), there are mappings to cDNA variants.
    * 
    * @param protein
    *          the peptide dataset sequence
@@ -392,41 +410,36 @@ public class AlignedCodonFrame
    *          residue position (base 1) in the peptide sequence
    * @return
    */
-  public char[] getMappedCodon(SequenceI protein, int aaPos)
+  public List<char[]> getMappedCodons(SequenceI protein, int aaPos)
   {
-    if (dnaToProt == null)
-    {
-      return null;
-    }
     MapList ml = null;
     SequenceI dnaSeq = null;
-    for (int i = 0; i < dnaToProt.length; i++)
+    List<char[]> result = new ArrayList<char[]>();
+
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      if (dnaToProt[i].to == protein)
+      if (ssm.mapping.to == protein)
       {
-        ml = getdnaToProt()[i];
-        dnaSeq = dnaSeqs[i];
-        break;
+        ml = ssm.mapping.map;
+        dnaSeq = ssm.fromSeq;
+
+        int[] codonPos = ml.locateInFrom(aaPos, aaPos);
+        if (codonPos == null)
+        {
+          return null;
+        }
+
+        /*
+         * Read off the mapped nucleotides (converting to position base 0)
+         */
+        codonPos = MappingUtils.flattenRanges(codonPos);
+        char[] dna = dnaSeq.getSequence();
+        int start = dnaSeq.getStart();
+        result.add(new char[] { dna[codonPos[0] - start],
+            dna[codonPos[1] - start], dna[codonPos[2] - start] });
       }
     }
-    if (ml == null)
-    {
-      return null;
-    }
-    int[] codonPos = ml.locateInFrom(aaPos, aaPos);
-    if (codonPos == null)
-    {
-      return null;
-    }
-
-    /*
-     * Read off the mapped nucleotides (converting to position base 0)
-     */
-    codonPos = MappingUtils.flattenRanges(codonPos);
-    char[] dna = dnaSeq.getSequence();
-    int start = dnaSeq.getStart();
-    return new char[] { dna[codonPos[0] - start], dna[codonPos[1] - start],
-        dna[codonPos[2] - start] };
+    return result.isEmpty() ? null : result;
   }
 
   /**
@@ -439,18 +452,14 @@ public class AlignedCodonFrame
   public List<Mapping> getMappingsForSequence(SequenceI seq)
   {
     List<Mapping> result = new ArrayList<Mapping>();
-    if (dnaSeqs == null)
-    {
-      return result;
-    }
     List<SequenceI> related = new ArrayList<SequenceI>();
     SequenceI seqDs = seq.getDatasetSequence();
     seqDs = seqDs != null ? seqDs : seq;
 
-    for (int ds = 0; ds < dnaSeqs.length; ds++)
+    for (SequenceToSequenceMapping ssm : mappings)
     {
-      final Mapping mapping = dnaToProt[ds];
-      if (dnaSeqs[ds] == seqDs || mapping.to == seqDs)
+      final Mapping mapping = ssm.mapping;
+      if (ssm.fromSeq == seqDs || mapping.to == seqDs)
       {
         if (!related.contains(mapping.to))
         {
@@ -461,4 +470,171 @@ public class AlignedCodonFrame
     }
     return result;
   }
+
+  /**
+   * Test whether the given sequence is substitutable for one or more dummy
+   * sequences in this mapping
+   * 
+   * @param map
+   * @param seq
+   * @return
+   */
+  public boolean isRealisableWith(SequenceI seq)
+  {
+    return realiseWith(seq, false) > 0;
+  }
+
+  /**
+   * Replace any matchable mapped dummy sequences with the given real one.
+   * Returns the count of sequence mappings instantiated.
+   * 
+   * @param seq
+   * @return
+   */
+  public int realiseWith(SequenceI seq)
+  {
+    return realiseWith(seq, true);
+  }
+
+  /**
+   * Returns the number of mapped dummy sequences that could be replaced with
+   * the given real sequence.
+   * 
+   * @param seq
+   *          a dataset sequence
+   * @param doUpdate
+   *          if true, performs replacements, else only counts
+   * @return
+   */
+  protected int realiseWith(SequenceI seq, boolean doUpdate)
+  {
+    SequenceI ds = seq.getDatasetSequence() != null ? seq
+            .getDatasetSequence() : seq;
+    int count = 0;
+
+    /*
+     * check for replaceable DNA ('map from') sequences
+     */
+    for (SequenceToSequenceMapping ssm : mappings)
+    {
+      SequenceI dna = ssm.fromSeq;
+      if (dna instanceof SequenceDummy
+              && dna.getName().equals(ds.getName()))
+      {
+        Mapping mapping = ssm.mapping;
+        int mapStart = mapping.getMap().getFromLowest();
+        int mapEnd = mapping.getMap().getFromHighest();
+        boolean mappable = couldRealiseSequence(dna, ds, mapStart, mapEnd);
+        if (mappable)
+        {
+          count++;
+          if (doUpdate)
+          {
+            // TODO: new method ? ds.realise(dna);
+            // might want to copy database refs as well
+            ds.setSequenceFeatures(dna.getSequenceFeatures());
+            // dnaSeqs[i] = ds;
+            ssm.fromSeq = ds;
+            System.out.println("Realised mapped sequence " + ds.getName());
+          }
+        }
+      }
+
+      /*
+       * check for replaceable protein ('map to') sequences
+       */
+      Mapping mapping = ssm.mapping;
+      SequenceI prot = mapping.getTo();
+      int mapStart = mapping.getMap().getToLowest();
+      int mapEnd = mapping.getMap().getToHighest();
+      boolean mappable = couldRealiseSequence(prot, ds, mapStart, mapEnd);
+      if (mappable)
+      {
+        count++;
+        if (doUpdate)
+        {
+          // TODO: new method ? ds.realise(dna);
+          // might want to copy database refs as well
+          ds.setSequenceFeatures(dna.getSequenceFeatures());
+          ssm.mapping.setTo(ds);
+        }
+      }
+    }
+    return count;
+  }
+
+  /**
+   * Helper method to test whether a 'real' sequence could replace a 'dummy'
+   * sequence in the map. The criteria are that they have the same name, and
+   * that the mapped region overlaps the candidate sequence.
+   * 
+   * @param existing
+   * @param replacement
+   * @param mapStart
+   * @param mapEnd
+   * @return
+   */
+  protected static boolean couldRealiseSequence(SequenceI existing,
+          SequenceI replacement, int mapStart, int mapEnd)
+  {
+    if (existing instanceof SequenceDummy
+            && !(replacement instanceof SequenceDummy)
+            && existing.getName().equals(replacement.getName()))
+    {
+      int start = replacement.getStart();
+      int end = replacement.getEnd();
+      boolean mappingOverlapsSequence = (mapStart >= start && mapStart <= end)
+              || (mapEnd >= start && mapEnd <= end);
+      if (mappingOverlapsSequence)
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Change any mapping to the given sequence to be to its dataset sequence
+   * instead. For use when mappings are created before their referenced
+   * sequences are instantiated, for example when parsing GFF data.
+   * 
+   * @param seq
+   */
+  public void updateToDataset(SequenceI seq)
+  {
+    if (seq == null || seq.getDatasetSequence() == null)
+    {
+      return;
+    }
+    SequenceI ds = seq.getDatasetSequence();
+
+    for (SequenceToSequenceMapping ssm : mappings)
+    /*
+     * 'from' sequences
+     */
+    {
+      if (ssm.fromSeq == seq)
+      {
+        ssm.fromSeq = ds;
+      }
+
+      /*
+       * 'to' sequences
+       */
+      if (ssm.mapping.to == seq)
+      {
+        ssm.mapping.to = ds;
+      }
+    }
+  }
+
+  /**
+   * Answers true if this object contains no mappings
+   * 
+   * @return
+   */
+  public boolean isEmpty()
+  {
+    return mappings.isEmpty();
+  }
 }
index 7ea9985..1134857 100755 (executable)
@@ -22,13 +22,14 @@ package jalview.datamodel;
 
 import jalview.analysis.AlignmentUtils;
 import jalview.io.FastaFile;
+import jalview.util.Comparison;
 import jalview.util.MessageManager;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Hashtable;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -47,8 +48,7 @@ public class Alignment implements AlignmentI
 
   protected List<SequenceI> sequences;
 
-  protected List<SequenceGroup> groups = java.util.Collections
-          .synchronizedList(new ArrayList<SequenceGroup>());
+  protected List<SequenceGroup> groups;
 
   protected char gapCharacter = '-';
 
@@ -60,20 +60,21 @@ public class Alignment implements AlignmentI
 
   public boolean hasRNAStructure = false;
 
-  /** DOCUMENT ME!! */
   public AlignmentAnnotation[] annotations;
 
-  HiddenSequences hiddenSequences = new HiddenSequences(this);
+  HiddenSequences hiddenSequences;
 
   public Hashtable alignmentProperties;
 
-  private Set<AlignedCodonFrame> codonFrameList = new LinkedHashSet<AlignedCodonFrame>();
+  private List<AlignedCodonFrame> codonFrameList;
 
   private void initAlignment(SequenceI[] seqs)
   {
-    int i = 0;
+    groups = Collections.synchronizedList(new ArrayList<SequenceGroup>());
+    hiddenSequences = new HiddenSequences(this);
+    codonFrameList = new ArrayList<AlignedCodonFrame>();
 
-    if (jalview.util.Comparison.isNucleotide(seqs))
+    if (Comparison.isNucleotide(seqs))
     {
       type = NUCLEOTIDE;
     }
@@ -82,10 +83,9 @@ public class Alignment implements AlignmentI
       type = PROTEIN;
     }
 
-    sequences = java.util.Collections
-            .synchronizedList(new ArrayList<SequenceI>());
+    sequences = Collections.synchronizedList(new ArrayList<SequenceI>());
 
-    for (i = 0; i < seqs.length; i++)
+    for (int i = 0; i < seqs.length; i++)
     {
       sequences.add(seqs[i]);
     }
@@ -104,13 +104,12 @@ public class Alignment implements AlignmentI
       seqs[i] = new Sequence(seqs[i]);
     }
 
+    initAlignment(seqs);
+
     /*
-     * Share the same dataset sequence mappings (if any). TODO: find a better
-     * place for these to live (alignment dataset?).
+     * Share the same dataset sequence mappings (if any). 
      */
-    this.codonFrameList = ((Alignment) al).codonFrameList;
-
-    initAlignment(seqs);
+    this.setCodonFrames(al.getCodonFrames());
   }
 
   /**
@@ -991,25 +990,7 @@ public class Alignment implements AlignmentI
   {
     if (dataset == null && data == null)
     {
-      // Create a new dataset for this alignment.
-      // Can only be done once, if dataset is not null
-      // This will not be performed
-      SequenceI[] seqs = new SequenceI[getHeight()];
-      SequenceI currentSeq;
-      for (int i = 0; i < getHeight(); i++)
-      {
-        currentSeq = getSequenceAt(i);
-        if (currentSeq.getDatasetSequence() != null)
-        {
-          seqs[i] = currentSeq.getDatasetSequence();
-        }
-        else
-        {
-          seqs[i] = currentSeq.createDatasetSequence();
-        }
-      }
-
-      dataset = new Alignment(seqs);
+      createDatasetAlignment();
     }
     else if (dataset == null && data != null)
     {
@@ -1040,6 +1021,37 @@ public class Alignment implements AlignmentI
   }
 
   /**
+   * Creates a new dataset for this alignment. Can only be done once - if
+   * dataset is not null this will not be performed.
+   */
+  public void createDatasetAlignment()
+  {
+    if (dataset != null)
+    {
+      return;
+    }
+    SequenceI[] seqs = new SequenceI[getHeight()];
+    SequenceI currentSeq;
+    for (int i = 0; i < getHeight(); i++)
+    {
+      currentSeq = getSequenceAt(i);
+      if (currentSeq.getDatasetSequence() != null)
+      {
+        seqs[i] = currentSeq.getDatasetSequence();
+      }
+      else
+      {
+        seqs[i] = currentSeq.createDatasetSequence();
+      }
+    }
+
+    dataset = new Alignment(seqs);
+    // move mappings to the dataset alignment
+    dataset.codonFrameList = this.codonFrameList;
+    this.codonFrameList = null;
+  }
+
+  /**
    * reference count for number of alignments referencing this one.
    */
   int alignmentRefs = 0;
@@ -1261,19 +1273,17 @@ public class Alignment implements AlignmentI
     return alignmentProperties;
   }
 
-  /*
-   * (non-Javadoc)
-   * 
-   * @see
-   * jalview.datamodel.AlignmentI#addCodonFrame(jalview.datamodel.AlignedCodonFrame
-   * )
+  /**
+   * Adds the given mapping to the stored set. Note this may be held on the
+   * dataset alignment.
    */
   @Override
   public void addCodonFrame(AlignedCodonFrame codons)
   {
-    if (codons != null)
+    List<AlignedCodonFrame> acfs = getCodonFrames();
+    if (codons != null && acfs != null && !acfs.contains(codons))
     {
-      codonFrameList.add(codons);
+      acfs.add(codons);
     }
   }
 
@@ -1291,7 +1301,7 @@ public class Alignment implements AlignmentI
       return null;
     }
     List<AlignedCodonFrame> cframes = new ArrayList<AlignedCodonFrame>();
-    for (AlignedCodonFrame acf : codonFrameList)
+    for (AlignedCodonFrame acf : getCodonFrames())
     {
       if (acf.involvesSequence(seq))
       {
@@ -1302,42 +1312,50 @@ public class Alignment implements AlignmentI
   }
 
   /**
-   * Sets the codon frame mappings (replacing any existing mappings).
+   * Sets the codon frame mappings (replacing any existing mappings). Note the
+   * mappings are set on the dataset alignment instead if there is one.
    * 
    * @see jalview.datamodel.AlignmentI#setCodonFrames()
    */
   @Override
-  public void setCodonFrames(Set<AlignedCodonFrame> acfs)
+  public void setCodonFrames(List<AlignedCodonFrame> acfs)
   {
-    this.codonFrameList = acfs;
+    if (dataset != null)
+    {
+      dataset.setCodonFrames(acfs);
+    }
+    else
+    {
+      this.codonFrameList = acfs;
+    }
   }
 
   /**
    * Returns the set of codon frame mappings. Any changes to the returned set
-   * will affect the alignment.
+   * will affect the alignment. The mappings are held on (and read from) the
+   * dataset alignment if there is one.
    * 
    * @see jalview.datamodel.AlignmentI#getCodonFrames()
    */
   @Override
-  public Set<AlignedCodonFrame> getCodonFrames()
+  public List<AlignedCodonFrame> getCodonFrames()
   {
-    return codonFrameList;
+    return dataset != null ? dataset.getCodonFrames() : codonFrameList;
   }
 
-  /*
-   * (non-Javadoc)
-   * 
-   * @seejalview.datamodel.AlignmentI#removeCodonFrame(jalview.datamodel.
-   * AlignedCodonFrame)
+  /**
+   * Removes the given mapping from the stored set. Note that the mappings are
+   * held on the dataset alignment if there is one.
    */
   @Override
   public boolean removeCodonFrame(AlignedCodonFrame codons)
   {
-    if (codons == null || codonFrameList == null)
+    List<AlignedCodonFrame> acfs = getCodonFrames();
+    if (codons == null || acfs == null)
     {
       return false;
     }
-    return codonFrameList.remove(codons);
+    return acfs.remove(codons);
   }
 
   @Override
@@ -1383,7 +1401,7 @@ public class Alignment implements AlignmentI
       addAnnotation(alan[a]);
     }
 
-    this.codonFrameList.addAll(toappend.getCodonFrames());
+    getCodonFrames().addAll(toappend.getCodonFrames());
 
     List<SequenceGroup> sg = toappend.getGroups();
     if (sg != null)
@@ -1595,6 +1613,7 @@ public class Alignment implements AlignmentI
    * 
    * @return the representative sequence for this group
    */
+  @Override
   public SequenceI getSeqrep()
   {
     return seqrep;
@@ -1607,6 +1626,7 @@ public class Alignment implements AlignmentI
    * @param seqrep
    *          the seqrep to set (null means no sequence representative)
    */
+  @Override
   public void setSeqrep(SequenceI seqrep)
   {
     this.seqrep = seqrep;
@@ -1616,6 +1636,7 @@ public class Alignment implements AlignmentI
    * 
    * @return true if group has a sequence representative
    */
+  @Override
   public boolean hasSeqrep()
   {
     return seqrep != null;
@@ -1748,4 +1769,46 @@ public class Alignment implements AlignmentI
     }
     return hasValidSeq;
   }
+
+  /**
+   * Update any mappings to 'virtual' sequences to compatible real ones, if
+   * present in the added sequences. Returns a count of mappings updated.
+   * 
+   * @param seqs
+   * @return
+   */
+  @Override
+  public int realiseMappings(List<SequenceI> seqs)
+  {
+    int count = 0;
+    for (SequenceI seq : seqs)
+    {
+      for (AlignedCodonFrame mapping : getCodonFrames())
+      {
+        count += mapping.realiseWith(seq);
+      }
+    }
+    return count;
+  }
+
+  /**
+   * Returns the first AlignedCodonFrame that has a mapping between the given
+   * dataset sequences
+   * 
+   * @param mapFrom
+   * @param mapTo
+   * @return
+   */
+  @Override
+  public AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo)
+  {
+    for (AlignedCodonFrame acf : getCodonFrames())
+    {
+      if (acf.getAaForDnaSeq(mapFrom) == mapTo)
+      {
+        return acf;
+      }
+    }
+    return null;
+  }
 }
index de79488..396ef2d 100755 (executable)
@@ -375,12 +375,12 @@ public interface AlignmentI extends AnnotatedCollectionI
    * 
    * @return
    */
-  Set<AlignedCodonFrame> getCodonFrames();
+  List<AlignedCodonFrame> getCodonFrames();
 
   /**
-   * Set the codon frame mappings (replacing any existing set).
+   * Set the codon frame mappings (replacing any existing list).
    */
-  void setCodonFrames(Set<AlignedCodonFrame> acfs);
+  void setCodonFrames(List<AlignedCodonFrame> acfs);
 
   /**
    * get codon frames involving sequenceI
@@ -524,4 +524,23 @@ public interface AlignmentI extends AnnotatedCollectionI
    * @return
    */
   public boolean hasValidSequence();
+
+  /**
+   * Update any mappings to 'virtual' sequences to compatible real ones, if
+   * present in the added sequences. Returns a count of mappings updated.
+   * 
+   * @param seqs
+   * @return
+   */
+  int realiseMappings(List<SequenceI> seqs);
+
+  /**
+   * Returns the first AlignedCodonFrame that has a mapping between the given
+   * dataset sequences
+   * 
+   * @param mapFrom
+   * @param mapTo
+   * @return
+   */
+  AlignedCodonFrame getMapping(SequenceI mapFrom, SequenceI mapTo);
 }
index 6a676cf..8783e4f 100755 (executable)
@@ -79,62 +79,18 @@ public class DBRefSource
   public static final String GENEDB = "GeneDB".toUpperCase();
 
   /**
-   * List of databases whose sequences might have coding regions annotated
-   */
-  public static final String[] DNACODINGDBS = { EMBL, EMBLCDS, GENEDB };
-
-  public static final String[] CODINGDBS = { EMBLCDS, GENEDB };
-
-  public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB,
-      EMBLCDSProduct };
-
-  public static final String[] PROTEINSEQ = { UNIPROT, UNIPROTKB,
-      EMBLCDSProduct };
-
-  public static final String[] PROTEINSTR = { PDB };
-
-  public static final String[] DOMAINDBS = { PFAM, RFAM };
-
-  /**
-   * set of unique DBRefSource property constants. These could be used to
-   * reconstruct the above groupings
-   */
-  public static final Object SEQDB = "SQ";
-
-  /**
-   * database of nucleic acid sequences
-   */
-  public static final Object DNASEQDB = "NASQ";
-
-  /**
-   * database of amino acid sequences
-   */
-  public static final Object PROTSEQDB = "PROTSQ";
-
-  /**
-   * database of cDNA sequences
-   */
-  public static final Object CODINGSEQDB = "CODING";
-
-  /**
-   * database of na sequences with exon annotation
+   * Ensembl
    */
-  public static final Object DNACODINGSEQDB = "XONCODING";
+  public static final String ENSEMBL = "ENSEMBL";
 
   /**
-   * DB returns several sequences associated with a protein/nucleotide domain
+   * List of databases whose sequences might have coding regions annotated
    */
-  public static final Object DOMAINDB = "DOMAIN";
+  public static final String[] DNACODINGDBS = { EMBL, EMBLCDS, GENEDB,
+      ENSEMBL };
 
-  /**
-   * DB query can take multiple accession codes concatenated by a separator.
-   * Value of property indicates maximum number of accession codes to send at a
-   * time.
-   */
-  public static final Object MULTIACC = "MULTIACC";
+  public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
 
-  /**
-   * DB query returns an alignment for each accession provided.
-   */
-  public static final Object ALIGNMENTDB = "ALIGNMENTS";
+  public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB,
+      EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein
 }
index 6c619ce..c4c4a2a 100644 (file)
@@ -693,6 +693,7 @@ public class Mapping
    * 
    * @see java.lang.Object#finalize()
    */
+  @Override
   protected void finalize() throws Throwable
   {
     map = null;
@@ -705,4 +706,14 @@ public class Mapping
     return new AlignedCodonIterator(seq, gapChar);
   }
 
+  /**
+   * Readable representation for debugging only, not guaranteed not to change
+   */
+  @Override
+  public String toString()
+  {
+    return String.format("%s %s", this.map.toString(), this.to == null ? ""
+            : this.to.getName());
+  }
+
 }
diff --git a/src/jalview/datamodel/MappingType.java b/src/jalview/datamodel/MappingType.java
new file mode 100644 (file)
index 0000000..c0c69aa
--- /dev/null
@@ -0,0 +1,63 @@
+package jalview.datamodel;
+
+/**
+ * An enumeration of the kinds of mapping (from nucleotide or peptide, to
+ * nucleotide or peptide), and the corresponding word lengths
+ */
+public enum MappingType
+{
+  NucleotideToPeptide(3, 1)
+  {
+    @Override
+    public MappingType getInverse()
+    {
+      return PeptideToNucleotide;
+    }
+  },
+  PeptideToNucleotide(1, 3)
+  {
+    @Override
+    public MappingType getInverse()
+    {
+      return NucleotideToPeptide;
+    }
+  },
+  NucleotideToNucleotide(1, 1)
+  {
+    @Override
+    public MappingType getInverse()
+    {
+      return NucleotideToNucleotide;
+    }
+  },
+  PeptideToPeptide(1, 1)
+  {
+    @Override
+    public MappingType getInverse()
+    {
+      return PeptideToPeptide;
+    }
+  };
+
+  private int fromRatio;
+
+  private int toRatio;
+
+  private MappingType(int fromSize, int toSize)
+  {
+    fromRatio = fromSize;
+    toRatio = toSize;
+  }
+
+  public abstract MappingType getInverse();
+
+  public int getFromRatio()
+  {
+    return fromRatio;
+  }
+
+  public int getToRatio()
+  {
+    return toRatio;
+  }
+}
index ad0e472..b9db461 100755 (executable)
@@ -67,8 +67,22 @@ public class SearchResults
     public Match(SequenceI seq, int start, int end)
     {
       sequence = seq;
-      this.start = start;
-      this.end = end;
+
+      /*
+       * always hold in forwards order, even if given in reverse order
+       * (such as from a mapping to a reverse strand); this avoids
+       * trouble for routines that highlight search results etc
+       */
+      if (start <= end)
+      {
+        this.start = start;
+        this.end = end;
+      }
+      else
+      {
+        this.start = end;
+        this.end = start;
+      }
     }
 
     public SequenceI getSequence()
index ac2f9c1..7b05649 100755 (executable)
@@ -265,11 +265,10 @@ public class Sequence extends ASequence implements SequenceI
     }
     if (seq.getAllPDBEntries() != null)
     {
-      Vector ids = seq.getAllPDBEntries();
-      Enumeration e = ids.elements();
-      while (e.hasMoreElements())
+      Vector<PDBEntry> ids = seq.getAllPDBEntries();
+      for (PDBEntry pdb : ids)
       {
-        this.addPDBId(new PDBEntry((PDBEntry) e.nextElement()));
+        this.addPDBId(new PDBEntry(pdb));
       }
     }
   }
@@ -289,6 +288,7 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public synchronized void addSequenceFeature(SequenceFeature sf)
   {
+    // TODO add to dataset sequence instead if there is one?
     if (sequenceFeatures == null)
     {
       sequenceFeatures = new SequenceFeature[0];
@@ -931,6 +931,7 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void addDBRef(DBRefEntry entry)
   {
+    // TODO add to dataset sequence instead if there is one?
     if (dbrefs == null)
     {
       dbrefs = new DBRefEntry[0];
@@ -964,6 +965,7 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public void setDatasetSequence(SequenceI seq)
   {
+    // TODO check for circular reference before setting?
     datasetSequence = seq;
   }
 
index 7e3c187..172c25f 100644 (file)
@@ -20,7 +20,7 @@
  */
 package jalview.datamodel;
 
-public class SequenceDummy extends Sequence implements SequenceI
+public class SequenceDummy extends Sequence
 {
   public SequenceDummy(String sequenceId)
   {
@@ -50,4 +50,14 @@ public class SequenceDummy extends Sequence implements SequenceI
   {
     return dummy;
   }
+
+  /**
+   * Always suppress /start-end for display name as we don't know it
+   */
+  @Override
+  public String getDisplayId(boolean jvsuffix)
+  {
+    // required for correct behaviour of SequenceIdMatcher
+    return super.getDisplayId(false);
+  }
 }
index 1b6498f..252f46c 100755 (executable)
@@ -20,7 +20,8 @@
  */
 package jalview.datamodel;
 
-import java.util.Hashtable;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Vector;
 
 /**
@@ -31,6 +32,15 @@ import java.util.Vector;
  */
 public class SequenceFeature
 {
+  private static final String STATUS = "status";
+
+  private static final String STRAND = "STRAND";
+
+  // private key for Phase designed not to conflict with real GFF data
+  private static final String PHASE = "!Phase";
+
+  private static final String ATTRIBUTES = "ATTRIBUTES";
+
   public int begin;
 
   public int end;
@@ -41,7 +51,7 @@ public class SequenceFeature
 
   public String description;
 
-  public Hashtable otherDetails;
+  public Map<String, Object> otherDetails;
 
   public Vector<String> links;
 
@@ -54,9 +64,9 @@ public class SequenceFeature
   }
 
   /**
-   * Constructs a duplicate feature. Note: Uses clone on the otherDetails so
-   * only shallow copies are made of additional properties and method will
-   * silently fail if unclonable objects are found in the hash.
+   * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
+   * otherDetails map, so the new and original SequenceFeature may reference the
+   * same objects in the map.
    * 
    * @param cpy
    */
@@ -83,10 +93,11 @@ public class SequenceFeature
       {
         try
         {
-          otherDetails = (Hashtable) cpy.otherDetails.clone();
+          otherDetails = (Map<String, Object>) ((HashMap<String, Object>) cpy.otherDetails)
+                  .clone();
         } catch (Exception e)
         {
-          // Uncloneable objects in the otherDetails - don't complain
+          // ignore
         }
       }
       if (cpy.links != null && cpy.links.size() > 0)
@@ -105,7 +116,7 @@ public class SequenceFeature
   {
     this.type = type;
     this.description = desc;
-    setValue("status", status);
+    setValue(STATUS, status);
     this.begin = begin;
     this.end = end;
     this.featureGroup = featureGroup;
@@ -229,7 +240,7 @@ public class SequenceFeature
   }
 
   /**
-   * Used for getting values which are not in the basic set. eg STRAND, FRAME
+   * Used for getting values which are not in the basic set. eg STRAND, PHASE
    * for GFF file
    * 
    * @param key
@@ -248,6 +259,20 @@ public class SequenceFeature
   }
 
   /**
+   * Returns a property value for the given key if known, else the specified
+   * default value
+   * 
+   * @param key
+   * @param defaultValue
+   * @return
+   */
+  public Object getValue(String key, Object defaultValue)
+  {
+    Object value = getValue(key);
+    return value == null ? defaultValue : value;
+  }
+
+  /**
    * Used for setting values which are not in the basic set. eg STRAND, FRAME
    * for GFF file
    * 
@@ -262,7 +287,7 @@ public class SequenceFeature
     {
       if (otherDetails == null)
       {
-        otherDetails = new Hashtable();
+        otherDetails = new HashMap<String, Object>();
       }
 
       otherDetails.put(key, value);
@@ -275,20 +300,22 @@ public class SequenceFeature
    */
   public void setStatus(String status)
   {
-    setValue("status", status);
+    setValue(STATUS, status);
   }
 
   public String getStatus()
   {
-    if (otherDetails != null)
-    {
-      String stat = (String) otherDetails.get("status");
-      if (stat != null)
-      {
-        return new String(stat);
-      }
-    }
-    return null;
+    return (String) getValue(STATUS);
+  }
+
+  public void setAttributes(String attr)
+  {
+    setValue(ATTRIBUTES, attr);
+  }
+
+  public String getAttributes()
+  {
+    return (String) getValue(ATTRIBUTES);
   }
 
   public void setPosition(int pos)
@@ -302,23 +329,53 @@ public class SequenceFeature
     return begin;
   }
 
+  /**
+   * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
+   * GFF), and 0 for unknown or not (validly) specified
+   * 
+   * @return
+   */
   public int getStrand()
   {
-    String str;
-    if (otherDetails == null
-            || (str = otherDetails.get("STRAND").toString()) == null)
-    {
-      return 0;
-    }
-    if (str.equals("-"))
-    {
-      return -1;
-    }
-    if (str.equals("+"))
+    int strand = 0;
+    if (otherDetails != null)
     {
-      return 1;
+      Object str = otherDetails.get(STRAND);
+      if ("-".equals(str))
+      {
+        strand = -1;
+      }
+      else if ("+".equals(str))
+      {
+        strand = 1;
+      }
     }
-    return 0;
+    return strand;
   }
 
+  public void setStrand(String strand)
+  {
+    setValue(STRAND, strand);
+  }
+
+  public void setPhase(String phase)
+  {
+    setValue(PHASE, phase);
+  }
+
+  public String getPhase()
+  {
+    return (String) getValue(PHASE);
+  }
+
+  /**
+   * Readable representation, for debug only, not guaranteed not to change
+   * between versions
+   */
+  @Override
+  public String toString()
+  {
+    return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
+            getDescription());
+  }
 }
diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java
new file mode 100644 (file)
index 0000000..a2ecfcd
--- /dev/null
@@ -0,0 +1,81 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntology;
+
+import com.stevesoft.pat.Regex;
+
+public class EnsemblCdna extends EnsemblSeqProxy
+{
+  /*
+   * fetch exon features on genomic sequence (to identify the cdnaregions)
+   * and cds and variation features (to retain)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.exon, EnsemblFeatureType.cds,
+      EnsemblFeatureType.variation };
+
+  public EnsemblCdna()
+  {
+    super();
+  }
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (CDNA)";
+  }
+
+  @Override
+  protected EnsemblSeqType getSourceEnsemblType()
+  {
+    return EnsemblSeqType.CDNA;
+  }
+
+  @Override
+  public Regex getAccessionValidator()
+  {
+    return new Regex("((ENST|ENSG|CCDS)[0-9.]{3,})");
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'transcript' (or a sub-type in the
+   * Sequence Ontology).
+   */
+  @Override
+  protected boolean retainFeature(SequenceFeature sf, String accessionId)
+  {
+    if (isTranscript(sf.getType()))
+    {
+      return false;
+    }
+    return featureMayBelong(sf, accessionId);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'exon' (or a subtype of exon
+   * in the Sequence Ontology), and the Parent of the feature is the transcript
+   * we are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.EXON))
+    {
+      String parentFeature = (String) sf.getValue(PARENT);
+      if (("transcript:" + accId).equals(parentFeature))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblCds.java b/src/jalview/ext/ensembl/EnsemblCds.java
new file mode 100644 (file)
index 0000000..7d0b6fd
--- /dev/null
@@ -0,0 +1,79 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.SequenceFeature;
+import jalview.io.gff.SequenceOntology;
+
+public class EnsemblCds extends EnsemblSeqProxy
+{
+  /*
+   * fetch cds features on genomic sequence (to identify the CDS regions)
+   * and exon and variation features (to retain for display)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.cds, EnsemblFeatureType.exon,
+      EnsemblFeatureType.variation };
+
+  /**
+   * Constructor
+   */
+  public EnsemblCds()
+  {
+    super();
+  }
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (CDS)";
+  }
+
+  @Override
+  protected EnsemblSeqType getSourceEnsemblType()
+  {
+    return EnsemblSeqType.CDS;
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'CDS' (or a sub-type of CDS in the
+   * Sequence Ontology). CDS features are only retrieved in order to identify
+   * the cds sequence range, and are redundant information on the cds sequence
+   * itself.
+   */
+  @Override
+  protected boolean retainFeature(SequenceFeature sf, String accessionId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.CDS))
+    {
+      return false;
+    }
+    return featureMayBelong(sf, accessionId);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'CDS' (or a subtype of CDS in
+   * the Sequence Ontology), and the Parent of the feature is the transcript we
+   * are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.CDS))
+    {
+      String parentFeature = (String) sf.getValue(PARENT);
+      if (("transcript:" + accId).equals(parentFeature))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java
new file mode 100644 (file)
index 0000000..b5ea686
--- /dev/null
@@ -0,0 +1,293 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntology;
+import jalview.util.MapList;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * A class that fetches genomic sequence and all transcripts for an Ensembl gene
+ * 
+ * @author gmcarstairs
+ */
+public class EnsemblGene extends EnsemblSeqProxy
+{
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.gene, EnsemblFeatureType.transcript,
+      EnsemblFeatureType.exon, EnsemblFeatureType.cds,
+      EnsemblFeatureType.variation };
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (GENE)";
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  @Override
+  protected EnsemblSeqType getSourceEnsemblType()
+  {
+    return EnsemblSeqType.GENOMIC;
+  }
+
+  /**
+   * Builds an alignment of all transcripts for the requested gene:
+   * <ul>
+   * <li>fetches the gene sequence</li>
+   * <li>fetches features on the sequence</li>
+   * <li>identifies "transcript" features whose Parent is the requested gene</li>
+   * <li>fetches the transcript sequence for each transcript</li>
+   * <li>makes a mapping from the gene to each transcript</li>
+   * <li>copies features from gene to transcript sequences</li>
+   * <li>fetches the protein sequence for each transcript, maps and saves it as
+   * a cross-reference</li>
+   * <li>aligns each transcript against the gene sequence based on the position
+   * mappings</li>
+   * </ul>
+   */
+  @Override
+  public AlignmentI getSequenceRecords(String query) throws Exception
+  {
+    // TODO ? if an ENST identifier is supplied, convert to ENSG?
+    AlignmentI al = super.getSequenceRecords(query);
+    if (al.getHeight() > 0)
+    {
+      getTranscripts(al, query);
+    }
+
+    return al;
+  }
+
+  /**
+   * Constructs all transcripts for the gene, as identified by "transcript"
+   * features whose Parent is the requested gene. The coding transcript
+   * sequences (i.e. with introns omitted) are added to the alignment.
+   * 
+   * @param al
+   * @param accId
+   * @throws Exception
+   */
+  protected void getTranscripts(AlignmentI al, String accId)
+          throws Exception
+  {
+    SequenceI gene = al.getSequenceAt(0);
+    List<SequenceFeature> transcriptFeatures = getTranscriptFeatures(accId,
+            gene);
+
+    for (SequenceFeature transcriptFeature : transcriptFeatures)
+    {
+      makeTranscript(transcriptFeature, al, gene);
+    }
+  }
+
+  /**
+   * Constructs a spliced transcript sequence by finding 'exon' features for the
+   * given id (or failing that 'CDS'). Copies features on to the new sequence.
+   * 'Aligns' the new sequence against the gene sequence by padding with gaps,
+   * and adds it to the alignment.
+   * 
+   * @param transcriptFeature
+   * @param al
+   *          the alignment to which to add the new sequence
+   * @param gene
+   *          the parent gene sequence, with features
+   * @return
+   */
+  SequenceI makeTranscript(SequenceFeature transcriptFeature,
+          AlignmentI al, SequenceI gene)
+  {
+    String accId = (String) transcriptFeature.getValue("transcript_id");
+    if (accId == null)
+    {
+      return null;
+    }
+
+    /*
+     * NB we are mapping from gene sequence (not genome), so do not
+     * need to check for reverse strand (gene and transcript sequences 
+     * are in forward sense)
+     */
+
+    /*
+     * make a gene-length sequence filled with gaps
+     * we will fill in the bases for transcript regions
+     */
+    char[] seqChars = new char[gene.getLength()];
+    Arrays.fill(seqChars, al.getGapCharacter());
+
+    /*
+     * look for exon features of the transcript, failing that for CDS
+     * (for example ENSG00000124610 has 1 CDS but no exon features)
+     */
+    String parentId = "transcript:" + accId;
+    List<SequenceFeature> splices = findFeatures(gene,
+            SequenceOntology.EXON, parentId);
+    if (splices.isEmpty())
+    {
+      splices = findFeatures(gene, SequenceOntology.CDS, parentId);
+    }
+
+    int transcriptLength = 0;
+    final char[] geneChars = gene.getSequence();
+    int offset = gene.getStart(); // to convert to 0-based positions
+    List<int[]> mappedFrom = new ArrayList<int[]>();
+
+    for (SequenceFeature sf : splices)
+    {
+      int start = sf.getBegin() - offset;
+      int end = sf.getEnd() - offset;
+      int spliceLength = end - start + 1;
+      System.arraycopy(geneChars, start, seqChars, start, spliceLength);
+      transcriptLength += spliceLength;
+      mappedFrom.add(new int[] { sf.getBegin(), sf.getEnd() });
+    }
+
+    Sequence transcript = new Sequence(accId, seqChars, 1, transcriptLength);
+    String geneName = (String) transcriptFeature.getValue(NAME);
+    if (geneName != null)
+    {
+      transcript.setDescription(geneName);
+    }
+    transcript.createDatasetSequence();
+
+    al.addSequence(transcript);
+
+    /*
+     * transfer features to the new sequence; we use EnsemblCdna to do this,
+     * to filter out unwanted features types (see method retainFeature)
+     */
+    List<int[]> mapTo = new ArrayList<int[]>();
+    mapTo.add(new int[] { 1, transcriptLength });
+    MapList mapping = new MapList(mappedFrom, mapTo, 1, 1);
+    new EnsemblCdna().transferFeatures(gene.getSequenceFeatures(),
+            transcript.getDatasetSequence(), mapping, parentId);
+
+    /*
+     * and finally fetch the protein product and save as a cross-reference
+     */
+    addProteinProduct(transcript);
+
+    return transcript;
+  }
+
+  /**
+   * Returns a list of the transcript features on the sequence whose Parent is
+   * the gene for the accession id.
+   * 
+   * @param accId
+   * @param geneSequence
+   * @return
+   */
+  protected List<SequenceFeature> getTranscriptFeatures(String accId,
+          SequenceI geneSequence)
+  {
+    List<SequenceFeature> transcriptFeatures = new ArrayList<SequenceFeature>();
+
+    String parentIdentifier = "gene:" + accId;
+    SequenceFeature[] sfs = geneSequence.getSequenceFeatures();
+
+    if (sfs != null)
+    {
+      for (SequenceFeature sf : sfs)
+      {
+        if (isTranscript(sf.getType()))
+        {
+          String parent = (String) sf.getValue(PARENT);
+          if (parentIdentifier.equals(parent))
+          {
+            transcriptFeatures.add(sf);
+          }
+        }
+      }
+    }
+
+    return transcriptFeatures;
+  }
+
+  @Override
+  public String getDescription()
+  {
+    return "Fetches all transcripts and variant features for a gene";
+  }
+
+  /**
+   * Default test query is a transcript
+   */
+  @Override
+  public String getTestQuery()
+  {
+    return "ENSG00000157764"; // BRAF, 5 transcripts, reverse strand
+    // ENSG00000090266 // NDUFB2, 15 transcripts, forward strand
+    // ENSG00000101812 // H2BFM histone, 3 transcripts, forward strand
+    // ENSG00000123569 // H2BFWT histone, 2 transcripts, reverse strand
+  }
+
+  /**
+   * Answers true for a feature of type 'gene' (or a sub-type of gene in the
+   * Sequence Ontology), whose ID is the accession we are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.GENE))
+    {
+      String id = (String) sf.getValue(ID);
+      if (("gene:" + accId).equals(id))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Answers true unless feature type is 'gene', or 'transcript' with a parent
+   * which is a different gene. We need the gene features to identify the range,
+   * but it is redundant information on the gene sequence. Checking the parent
+   * allows us to drop transcript features which belong to different
+   * (overlapping) genes.
+   */
+  @Override
+  protected boolean retainFeature(SequenceFeature sf, String accessionId)
+  {
+    if (SequenceOntology.getInstance().isA(sf.getType(),
+            SequenceOntology.GENE))
+    {
+      return false;
+    }
+
+    if (isTranscript(sf.getType()))
+    {
+      String parent = (String) sf.getValue(PARENT);
+      if (!("gene:" + accessionId).equals(parent))
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Answers false. This allows an optimisation - a single 'gene' feature is all
+   * that is needed to identify the positions of the gene on the genomic
+   * sequence.
+   */
+  @Override
+  protected boolean isSpliceable()
+  {
+    return false;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblGenome.java b/src/jalview/ext/ensembl/EnsemblGenome.java
new file mode 100644 (file)
index 0000000..b7db2bc
--- /dev/null
@@ -0,0 +1,73 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.SequenceFeature;
+
+public class EnsemblGenome extends EnsemblSeqProxy
+{
+  /*
+   * fetch transcript features on genomic sequence (to identify the transcript 
+   * regions) and cds, exon and variation features (to retain)
+   */
+  private static final EnsemblFeatureType[] FEATURES_TO_FETCH = {
+      EnsemblFeatureType.transcript, EnsemblFeatureType.exon,
+      EnsemblFeatureType.cds, EnsemblFeatureType.variation };
+
+  public EnsemblGenome()
+  {
+    super();
+  }
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (Genome)";
+  }
+
+  @Override
+  protected EnsemblSeqType getSourceEnsemblType()
+  {
+    return EnsemblSeqType.GENOMIC;
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    return FEATURES_TO_FETCH;
+  }
+
+  /**
+   * Answers true unless the feature type is 'transcript' (or a sub-type of
+   * transcript in the Sequence Ontology). Transcript features are only
+   * retrieved in order to identify the transcript sequence range, and are
+   * redundant information on the transcript sequence itself.
+   */
+  @Override
+  protected boolean retainFeature(SequenceFeature sf, String accessionId)
+  {
+    if (isTranscript(sf.getType()))
+    {
+      return false;
+    }
+    return featureMayBelong(sf, accessionId);
+  }
+
+  /**
+   * Answers true if the sequence feature type is 'transcript' (or a subtype of
+   * transcript in the Sequence Ontology), and the ID of the feature is the
+   * transcript we are retrieving
+   */
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    if (isTranscript(sf.getType()))
+    {
+      String id = (String) sf.getValue(ID);
+      if (("transcript:" + accId).equals(id))
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblOverlap.java b/src/jalview/ext/ensembl/EnsemblOverlap.java
new file mode 100644 (file)
index 0000000..b1514d8
--- /dev/null
@@ -0,0 +1,125 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.io.FeaturesFile;
+import jalview.io.FileParse;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A client for fetching and processing Ensembl overlap data in GFF feature
+ * format
+ * 
+ * @author gmcarstairs
+ * @see http://rest.ensembl.org/documentation/info/overlap_id
+ */
+public class EnsemblOverlap extends EnsemblRestClient
+{
+  /*
+   * The default features to retrieve from Ensembl; can override in getSequenceRecords
+   */
+  private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds,
+      EnsemblFeatureType.exon, EnsemblFeatureType.variation };
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (overlap)";
+  }
+
+  /**
+   * Makes a query to the REST overlap endpoint for the given sequence
+   * identifier. This returns an 'alignment' consisting of one 'dummy sequence'
+   * (the genomic sequence for which overlap features are returned by the
+   * service). This sequence will have on it sequence features which are the
+   * real information of interest, such as CDS regions or sequence variations.
+   */
+  @Override
+  public AlignmentI getSequenceRecords(String query) throws IOException
+  {
+    long now = System.currentTimeMillis();
+    // TODO: use a vararg String... for getSequenceRecords instead?
+    List<String> queries = new ArrayList<String>();
+    queries.add(query);
+    FileParse fp = getSequenceReader(queries);
+    FeaturesFile fr = new FeaturesFile(fp);
+    System.out.println(getClass().getName() + " took "
+            + (System.currentTimeMillis() - now) + "ms to fetch");
+    return new Alignment(fr.getSeqsAsArray());
+  }
+
+  /**
+   * Returns a URL for the REST overlap endpoint
+   * 
+   * @param ids
+   * @return
+   */
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    StringBuffer urlstring = new StringBuffer(128);
+    urlstring.append(ENSEMBL_REST).append("/overlap/id/")
+            .append(ids.get(0));
+
+    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+    urlstring.append("?content-type=text/x-gff3");
+
+    /*
+     * specify  features to retrieve
+     * @see http://rest.ensembl.org/documentation/info/overlap_id
+     * could make the list a configurable entry in jalview.properties
+     */
+    for (EnsemblFeatureType feature : featuresWanted)
+    {
+      urlstring.append("&feature=").append(feature.name());
+    }
+
+    return new URL(urlstring.toString());
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return true;
+  }
+
+  /**
+   * Returns the MIME type for GFF3. For GET requests the Content-type header
+   * describes the required encoding of the response.
+   */
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Returns the MIME type for GFF3.
+   */
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "text/x-gff3";
+  }
+
+  /**
+   * Overloaded method that allows a list of features to retrieve to be
+   * specified
+   * 
+   * @param accId
+   * @param features
+   * @return
+   * @throws IOException
+   */
+  protected AlignmentI getSequenceRecords(String accId,
+          EnsemblFeatureType[] features) throws IOException
+  {
+    featuresWanted = features;
+    return getSequenceRecords(accId);
+  }
+}
diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java
new file mode 100644 (file)
index 0000000..5238f98
--- /dev/null
@@ -0,0 +1,67 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+
+public class EnsemblProtein extends EnsemblSeqProxy
+{
+
+  public EnsemblProtein()
+  {
+    super();
+  }
+
+  @Override
+  public String getDbName()
+  {
+    return "ENSEMBL (Protein)";
+  }
+
+  @Override
+  protected EnsemblSeqType getSourceEnsemblType()
+  {
+    return EnsemblSeqType.PROTEIN;
+  }
+
+  /**
+   * Returns false, as this fetcher does not retrieve DNA sequences.
+   */
+  @Override
+  public boolean isDnaCoding()
+  {
+    return false;
+  }
+
+  /**
+   * Test query is to the protein translation of transcript ENST00000288602
+   */
+  @Override
+  public String getTestQuery()
+  {
+    return "ENSP00000288602";
+  }
+
+  /**
+   * Overrides base class method to do nothing - genomic features are not
+   * applicable to the protein product sequence
+   */
+  @Override
+  protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
+  {
+  }
+
+  @Override
+  protected EnsemblFeatureType[] getFeaturesToFetch()
+  {
+    // not applicable - can't fetch genomic features for a protein sequence
+    return null;
+  }
+
+  @Override
+  protected boolean identifiesSequence(SequenceFeature sf, String accId)
+  {
+    // not applicable - protein sequence is not a 'subset' of genomic sequence
+    return false;
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblRestClient.java b/src/jalview/ext/ensembl/EnsemblRestClient.java
new file mode 100644 (file)
index 0000000..f81bce2
--- /dev/null
@@ -0,0 +1,223 @@
+package jalview.ext.ensembl;
+
+import jalview.io.FileParse;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.List;
+
+import javax.ws.rs.HttpMethod;
+
+/**
+ * Base class for Ensembl REST service clients
+ * 
+ * @author gmcarstairs
+ */
+abstract class EnsemblRestClient extends EnsemblSequenceFetcher
+{
+  protected final static String ENSEMBL_REST = "http://rest.ensembl.org";
+
+  protected static final String SEQUENCE_ID_URL = ENSEMBL_REST
+          + "/sequence/id";
+
+  // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+  private static final String PING_URL = "http://rest.ensembl.org/info/ping.json";
+
+  private final static long RETEST_INTERVAL = 10000L; // 10 seconds
+
+  private static boolean ensemblRestAvailable = false;
+
+  private static long lastCheck = -1;
+
+  protected volatile boolean inProgress = false;
+
+  @Override
+  public boolean queryInProgress()
+  {
+    return inProgress;
+  }
+
+  @Override
+  public StringBuffer getRawRecords()
+  {
+    return null;
+  }
+
+  /**
+   * Returns the URL for the client http request
+   * 
+   * @param ids
+   * @return
+   * @throws MalformedURLException
+   */
+  protected abstract URL getUrl(List<String> ids)
+          throws MalformedURLException;
+
+  /**
+   * Returns true if client uses GET method, false if it uses POST
+   * 
+   * @return
+   */
+  protected abstract boolean useGetRequest();
+
+  /**
+   * Return the desired value for the Content-Type request header
+   * 
+   * @param multipleIds
+   * 
+   * @return
+   * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
+   */
+  protected abstract String getRequestMimeType(boolean multipleIds);
+
+  /**
+   * Return the desired value for the Accept request header
+   * 
+   * @return
+   * @see https://github.com/Ensembl/ensembl-rest/wiki/HTTP-Headers
+   */
+  protected abstract String getResponseMimeType();
+
+  /**
+   * Tries to connect to Ensembl's REST 'ping' endpoint, and returns true if
+   * successful, else false
+   * 
+   * @return
+   */
+  private boolean checkEnsembl()
+  {
+    try
+    {
+      URL ping = new URL(PING_URL);
+      HttpURLConnection conn = (HttpURLConnection) ping.openConnection();
+      int rc = conn.getResponseCode();
+      conn.disconnect();
+      if (rc >= 200 && rc < 300)
+      {
+        return true;
+      }
+    } catch (Throwable t)
+    {
+      System.err.println("Error connecting to " + PING_URL + ": "
+              + t.getMessage());
+    }
+    return false;
+  }
+
+  /**
+   * returns a reader to a Fasta response from the Ensembl sequence endpoint
+   * 
+   * @param ids
+   * @return
+   * @throws IOException
+   */
+  protected FileParse getSequenceReader(List<String> ids)
+          throws IOException
+  {
+    URL url = getUrl(ids);
+  
+    HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+  
+    /*
+     * POST method allows multiple queries in one request; it is supported for
+     * sequence queries, but not for overlap
+     */
+    boolean multipleIds = ids.size() > 1;// useGetRequest();
+    connection.setRequestMethod(multipleIds ? HttpMethod.POST
+            : HttpMethod.GET);
+    connection.setRequestProperty("Content-Type",
+            getRequestMimeType(multipleIds));
+    connection.setRequestProperty("Accept", getResponseMimeType());
+
+    connection.setUseCaches(false);
+    connection.setDoInput(true);
+    connection.setDoOutput(multipleIds);
+
+    if (multipleIds)
+    {
+      writePostBody(connection, ids);
+    }
+  
+    InputStream response = connection.getInputStream();
+    int responseCode = connection.getResponseCode();
+  
+    if (responseCode != 200)
+    {
+      /*
+       * note: a GET request for an invalid id returns an error code e.g. 415
+       * but POST request returns 200 and an empty Fasta response 
+       */
+      throw new RuntimeException(
+              "Response code was not 200. Detected response was "
+                      + responseCode);
+    }
+  
+    BufferedReader reader = null;
+    reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
+    FileParse fp = new FileParse(reader, url.toString(), "HTTP_POST");
+    return fp;
+  }
+
+  /**
+   * Rechecks if Ensembl is responding, unless the last check was successful and
+   * the retest interval has not yet elapsed. Returns true if Ensembl is up,
+   * else false.
+   * 
+   * @return
+   */
+  protected boolean isEnsemblAvailable()
+  {
+    long now = System.currentTimeMillis();
+    boolean retest = now - lastCheck > RETEST_INTERVAL;
+    if (ensemblRestAvailable && !retest)
+    {
+      return true;
+    }
+    ensemblRestAvailable = checkEnsembl();
+    lastCheck = now;
+    return ensemblRestAvailable;
+  }
+
+  /**
+   * Constructs, writes and flushes the POST body of the request, containing the
+   * query ids in JSON format
+   * 
+   * @param connection
+   * @param ids
+   * @throws IOException
+   */
+  protected void writePostBody(HttpURLConnection connection,
+          List<String> ids) throws IOException
+  {
+    boolean first;
+    StringBuilder postBody = new StringBuilder(64);
+    postBody.append("{\"ids\":[");
+    first = true;
+    for (String id : ids)
+    {
+      if (!first)
+      {
+        postBody.append(",");
+      }
+      first = false;
+      postBody.append("\"");
+      postBody.append(id.trim());
+      postBody.append("\"");
+    }
+    postBody.append("]}");
+    byte[] thepostbody = postBody.toString().getBytes();
+    connection.setRequestProperty("Content-Length",
+            Integer.toString(thepostbody.length));
+    DataOutputStream wr = new DataOutputStream(connection.getOutputStream());
+    wr.write(thepostbody);
+    wr.flush();
+    wr.close();
+  }
+
+}
diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
new file mode 100644 (file)
index 0000000..fb0b01c
--- /dev/null
@@ -0,0 +1,1064 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.exceptions.JalviewException;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+import jalview.io.gff.SequenceOntology;
+import jalview.schemes.ResidueProperties;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+import jalview.util.StringUtils;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+/**
+ * Base class for Ensembl sequence fetchers
+ * 
+ * @author gmcarstairs
+ */
+public abstract class EnsemblSeqProxy extends EnsemblRestClient
+{
+  protected static final String CONSEQUENCE_TYPE = "consequence_type";
+
+  protected static final String PARENT = "Parent";
+
+  protected static final String ID = "ID";
+
+  /*
+   * this needs special handling, as it isA sequence_variant in the
+   * Sequence Ontology, but behaves in Ensembl as if it isA transcript
+   */
+  protected static final String NMD_VARIANT = "NMD_transcript_variant";
+
+  protected static final String NAME = "Name";
+
+  public enum EnsemblSeqType
+  {
+    /**
+     * type=genomic for the full dna including introns
+     */
+    GENOMIC("genomic"),
+
+    /**
+     * type=cdna for transcribed dna including UTRs
+     */
+    CDNA("cdna"),
+
+    /**
+     * type=cds for coding dna excluding UTRs
+     */
+    CDS("cds"),
+
+    /**
+     * type=protein for the peptide product sequence
+     */
+    PROTEIN("protein");
+
+    /*
+     * the value of the 'type' parameter to fetch this version of 
+     * an Ensembl sequence
+     */
+    private String type;
+
+    EnsemblSeqType(String t)
+    {
+      type = t;
+    }
+
+    public String getType()
+    {
+      return type;
+    }
+
+  }
+
+  /**
+   * A comparator to sort ranges into ascending start position order
+   */
+  private class RangeSorter implements Comparator<int[]>
+  {
+    boolean forwards;
+
+    RangeSorter(boolean forward)
+    {
+      forwards = forward;
+    }
+
+    @Override
+    public int compare(int[] o1, int[] o2)
+    {
+      return (forwards ? 1 : -1) * Integer.compare(o1[0], o2[0]);
+    }
+
+  }
+
+  /**
+   * Constructor
+   */
+  public EnsemblSeqProxy()
+  {
+  }
+
+  /**
+   * Makes the sequence queries to Ensembl's REST service and returns an
+   * alignment consisting of the returned sequences.
+   */
+  @Override
+  public AlignmentI getSequenceRecords(String query) throws Exception
+  {
+    long now = System.currentTimeMillis();
+    // TODO use a String... query vararg instead?
+
+    // danger: accession separator used as a regex here, a string elsewhere
+    // in this case it is ok (it is just a space), but (e.g.) '\' would not be
+    List<String> allIds = Arrays.asList(query
+            .split(getAccessionSeparator()));
+    AlignmentI alignment = null;
+    inProgress = true;
+
+    /*
+     * execute queries, if necessary in batches of the
+     * maximum allowed number of ids
+     */
+    int maxQueryCount = getMaximumQueryCount();
+    for (int v = 0, vSize = allIds.size(); v < vSize; v += maxQueryCount)
+    {
+      int p = Math.min(vSize, v + maxQueryCount);
+      List<String> ids = allIds.subList(v, p);
+      try
+      {
+        alignment = fetchSequences(ids, alignment);
+      } catch (Throwable r)
+      {
+        inProgress = false;
+        String msg = "Aborting ID retrieval after " + v
+                + " chunks. Unexpected problem (" + r.getLocalizedMessage()
+                + ")";
+        System.err.println(msg);
+        if (alignment != null)
+        {
+          break; // return what we got
+        }
+        else
+        {
+          throw new JalviewException(msg, r);
+        }
+      }
+    }
+
+    /*
+     * fetch and transfer genomic sequence features,
+     * fetch protein product and add as cross-reference
+     */
+    for (String accId : allIds)
+    {
+      addFeaturesAndProduct(accId, alignment);
+    }
+
+    inProgress = false;
+    System.out.println(getClass().getName() + " took "
+            + (System.currentTimeMillis() - now) + "ms to fetch");
+    return alignment;
+  }
+
+  /**
+   * Fetches Ensembl features using the /overlap REST endpoint, and adds them to
+   * the sequence in the alignment. Also fetches the protein product, maps it
+   * from the CDS features of the sequence, and saves it as a cross-reference of
+   * the dna sequence.
+   * 
+   * @param accId
+   * @param alignment
+   */
+  protected void addFeaturesAndProduct(String accId, AlignmentI alignment)
+  {
+    if (alignment == null)
+    {
+      return;
+    }
+
+    try
+    {
+      /*
+       * get 'dummy' genomic sequence with exon, cds and variation features
+       */
+      SequenceI genomicSequence = null;
+      EnsemblOverlap gffFetcher = new EnsemblOverlap();
+      EnsemblFeatureType[] features = getFeaturesToFetch();
+      AlignmentI geneFeatures = gffFetcher.getSequenceRecords(accId,
+              features);
+      if (geneFeatures.getHeight() > 0)
+      {
+        genomicSequence = geneFeatures.getSequenceAt(0);
+      }
+      if (genomicSequence != null)
+      {
+        /*
+         * transfer features to the query sequence
+         */
+        SequenceI querySeq = alignment.findName(accId);
+        if (transferFeatures(accId, genomicSequence, querySeq))
+        {
+
+          /*
+           * fetch and map protein product, and add it as a cross-reference
+           * of the retrieved sequence
+           */
+          addProteinProduct(querySeq);
+        }
+      }
+    } catch (IOException e)
+    {
+      System.err.println("Error transferring Ensembl features: "
+              + e.getMessage());
+    }
+  }
+
+  /**
+   * Returns those sequence feature types to fetch from Ensembl. We may want
+   * features either because they are of interest to the user, or as means to
+   * identify the locations of the sequence on the genomic sequence (CDS
+   * features identify CDS, exon features identify cDNA etc).
+   * 
+   * @return
+   */
+  protected abstract EnsemblFeatureType[] getFeaturesToFetch();
+
+  /**
+   * Fetches and maps the protein product, and adds it as a cross-reference of
+   * the retrieved sequence
+   */
+  protected void addProteinProduct(SequenceI querySeq)
+  {
+    String accId = querySeq.getName();
+    try
+    {
+      AlignmentI protein = new EnsemblProtein().getSequenceRecords(accId);
+      if (protein == null || protein.getHeight() == 0)
+      {
+        System.out.println("Failed to retrieve protein for " + accId);
+        return;
+      }
+      SequenceI proteinSeq = protein.getSequenceAt(0);
+
+      /*
+       * need dataset sequences (to be the subject of mappings)
+       */
+      proteinSeq.createDatasetSequence();
+      querySeq.createDatasetSequence();
+
+      MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
+      if (mapList != null)
+      {
+        Mapping map = new Mapping(proteinSeq.getDatasetSequence(), mapList);
+        DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(),
+                accId, map);
+        querySeq.getDatasetSequence().addDBRef(dbr);
+        
+        /*
+         * compute peptide variants from dna variants and add as 
+         * sequence features on the protein sequence ta-da
+         */
+        computeProteinFeatures(querySeq, proteinSeq, mapList);
+      }
+    } catch (Exception e)
+    {
+      System.err
+              .println(String.format("Error retrieving protein for %s: %s",
+                      accId, e.getMessage()));
+    }
+  }
+
+  /**
+   * Returns a mapping from dna to protein by inspecting sequence features of
+   * type "CDS" on the dna.
+   * 
+   * @param dnaSeq
+   * @param proteinSeq
+   * @return
+   */
+  protected MapList mapCdsToProtein(SequenceI dnaSeq, SequenceI proteinSeq)
+  {
+    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
+    if (sfs == null)
+    {
+      return null;
+    }
+
+    List<int[]> ranges = new ArrayList<int[]>(50);
+    SequenceOntology so = SequenceOntology.getInstance();
+
+    int mappedDnaLength = 0;
+    
+    /*
+     * Map CDS columns of dna to peptide. No need to worry about reverse strand
+     * dna here since the retrieved sequence is as transcribed (reverse
+     * complement for reverse strand), i.e in the same sense as the peptide. 
+     */
+    boolean fivePrimeIncomplete = false;
+    for (SequenceFeature sf : sfs)
+    {
+      /*
+       * process a CDS feature (or a sub-type of CDS)
+       */
+      if (so.isA(sf.getType(), SequenceOntology.CDS))
+      {
+        int phase = 0;
+        try {
+          phase = Integer.parseInt(sf.getPhase());
+        } catch (NumberFormatException e)
+        {
+          // ignore
+        }
+        /*
+         * phase > 0 on first codon means 5' incomplete - skip to the start
+         * of the next codon; example ENST00000496384
+         */
+        int begin = sf.getBegin();
+        int end = sf.getEnd();
+        if (ranges.isEmpty() && phase > 0)
+        {
+          fivePrimeIncomplete = true;
+          begin += phase;
+          if (begin > end)
+          {
+            continue; // shouldn't happen?
+          }
+        }
+        ranges.add(new int[] { begin, end });
+        mappedDnaLength += Math.abs(end - begin) + 1;
+      }
+    }
+    int proteinLength = proteinSeq.getLength();
+    List<int[]> proteinRange = new ArrayList<int[]>();
+    int proteinStart = 1;
+    if (fivePrimeIncomplete && proteinSeq.getCharAt(0) == 'X')
+    {
+      proteinStart = 2;
+      proteinLength--;
+    }
+    proteinRange.add(new int[] { proteinStart, proteinLength });
+
+    /*
+     * dna length should map to protein (or protein plus stop codon)
+     */
+    int codesForResidues = mappedDnaLength / 3;
+    if (codesForResidues == proteinLength
+            || codesForResidues == (proteinLength + 1))
+    {
+      return new MapList(ranges, proteinRange, 3, 1);
+    }
+    return null;
+  }
+
+  /**
+   * Fetches sequences for the list of accession ids and adds them to the
+   * alignment. Returns the extended (or created) alignment.
+   * 
+   * @param ids
+   * @param alignment
+   * @return
+   * @throws JalviewException
+   * @throws IOException
+   */
+  protected AlignmentI fetchSequences(List<String> ids, AlignmentI alignment)
+          throws JalviewException, IOException
+  {
+    if (!isEnsemblAvailable())
+    {
+      inProgress = false;
+      throw new JalviewException("ENSEMBL Rest API not available.");
+    }
+    FileParse fp = getSequenceReader(ids);
+    FastaFile fr = new FastaFile(fp);
+    if (fr.hasWarningMessage())
+    {
+      System.out.println(String.format(
+              "Warning when retrieving %d ids %s\n%s", ids.size(),
+              ids.toString(), fr.getWarningMessage()));
+    }
+    else if (fr.getSeqs().size() != ids.size())
+    {
+      System.out.println(String.format(
+              "Only retrieved %d sequences for %d query strings", fr
+                      .getSeqs().size(), ids.size()));
+    }
+
+    if (fr.getSeqs().size() == 1 && fr.getSeqs().get(0).getLength() == 0)
+    {
+      /*
+       * POST request has returned an empty FASTA file e.g. for invalid id
+       */
+      throw new IOException("No data returned for " + ids);
+    }
+
+    if (fr.getSeqs().size() > 0)
+    {
+      AlignmentI seqal = new Alignment(
+              fr.getSeqsAsArray());
+      for (SequenceI sq:seqal.getSequences())
+      {
+        if (sq.getDescription() == null)
+        {
+          sq.setDescription(getDbName());
+        }
+        String name = sq.getName();
+        if (ids.contains(name)
+                || ids.contains(name.replace("ENSP", "ENST")))
+        {
+          DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name);
+        }
+      }
+      if (alignment == null)
+      {
+        alignment = seqal;
+      }
+      else
+      {
+        alignment.append(seqal);
+      }
+    }
+    return alignment;
+  }
+
+  /**
+   * Returns the URL for the REST call
+   * 
+   * @return
+   * @throws MalformedURLException
+   */
+  @Override
+  protected URL getUrl(List<String> ids) throws MalformedURLException
+  {
+    /*
+     * a single id is included in the URL path
+     * multiple ids go in the POST body instead
+     */
+    StringBuffer urlstring = new StringBuffer(128);
+    urlstring.append(SEQUENCE_ID_URL);
+    if (ids.size() == 1)
+    {
+      urlstring.append("/").append(ids.get(0));
+    }
+    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
+    urlstring.append("?type=").append(getSourceEnsemblType().getType());
+    urlstring.append(("&Accept=text/x-fasta"));
+
+    URL url = new URL(urlstring.toString());
+    return url;
+  }
+
+  /**
+   * A sequence/id POST request currently allows up to 50 queries
+   * 
+   * @see http://rest.ensembl.org/documentation/info/sequence_id_post
+   */
+  @Override
+  public int getMaximumQueryCount()
+  {
+    return 50;
+  }
+
+  @Override
+  protected boolean useGetRequest()
+  {
+    return false;
+  }
+
+  @Override
+  protected String getRequestMimeType(boolean multipleIds)
+  {
+    return multipleIds ? "application/json" : "text/x-fasta";
+  }
+
+  @Override
+  protected String getResponseMimeType()
+  {
+    return "text/x-fasta";
+  }
+
+  /**
+   * 
+   * @return the configured sequence return type for this source
+   */
+  protected abstract EnsemblSeqType getSourceEnsemblType();
+
+  /**
+   * Returns a list of [start, end] genomic ranges corresponding to the sequence
+   * being retrieved.
+   * 
+   * The correspondence between the frames of reference is made by locating
+   * those features on the genomic sequence which identify the retrieved
+   * sequence. Specifically
+   * <ul>
+   * <li>genomic sequence is identified by "transcript" features with
+   * ID=transcript:transcriptId</li>
+   * <li>cdna sequence is identified by "exon" features with
+   * Parent=transcript:transcriptId</li>
+   * <li>cds sequence is identified by "CDS" features with
+   * Parent=transcript:transcriptId</li>
+   * </ul>
+   * 
+   * The returned ranges are sorted to run forwards (for positive strand) or
+   * backwards (for negative strand). Aborts and returns null if both positive
+   * and negative strand are found (this should not normally happen).
+   * 
+   * @param sourceSequence
+   * @param accId
+   * @param start
+   *          the start position of the sequence we are mapping to
+   * @return
+   */
+  protected MapList getGenomicRanges(SequenceI sourceSequence,
+          String accId, int start)
+  {
+    SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    if (sfs == null)
+    {
+      return null;
+    }
+
+    /*
+     * generously initial size for number of cds regions
+     * (worst case titin Q8WZ42 has c. 313 exons)
+     */
+    List<int[]> regions = new ArrayList<int[]>(100);
+    int mappedLength = 0;
+    int direction = 1; // forward
+    boolean directionSet = false;
+  
+    for (SequenceFeature sf : sfs)
+    {
+      /*
+       * accept the target feature type or a specialisation of it
+       * (e.g. coding_exon for exon)
+       */
+      if (identifiesSequence(sf, accId))
+      {
+          int strand = sf.getStrand();
+  
+          if (directionSet && strand != direction)
+          {
+            // abort - mix of forward and backward
+          System.err.println("Error: forward and backward strand for "
+                  + accId);
+            return null;
+          }
+          direction = strand;
+          directionSet = true;
+  
+          /*
+           * add to CDS ranges, semi-sorted forwards/backwards
+           */
+          if (strand < 0)
+          {
+            regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
+          }
+          else
+          {
+          regions.add(new int[] { sf.getBegin(), sf.getEnd() });
+        }
+        mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
+
+        if (!isSpliceable())
+        {
+          /*
+           * 'gene' sequence is contiguous so we can stop as soon as its
+           * identifying feature has been found
+           */
+          break;
+        }
+      }
+    }
+  
+    if (regions.isEmpty())
+    {
+      System.out.println("Failed to identify target sequence for " + accId
+              + " from genomic features");
+      return null;
+    }
+
+    /*
+     * a final sort is needed since Ensembl returns CDS sorted within source
+     * (havana / ensembl_havana)
+     */
+    Collections.sort(regions, new RangeSorter(direction == 1));
+  
+    List<int[]> to = new ArrayList<int[]>();
+    to.add(new int[] { start, start + mappedLength - 1 });
+  
+    return new MapList(regions, to, 1, 1);
+  }
+
+  /**
+   * Answers true if the sequence being retrieved may occupy discontiguous
+   * regions on the genomic sequence.
+   */
+  protected boolean isSpliceable()
+  {
+    return true;
+  }
+
+  /**
+   * Returns true if the sequence feature marks positions of the genomic
+   * sequence feature which are within the sequence being retrieved. For
+   * example, an 'exon' feature whose parent is the target transcript marks the
+   * cdna positions of the transcript.
+   * 
+   * @param sf
+   * @param accId
+   * @return
+   */
+  protected abstract boolean identifiesSequence(SequenceFeature sf,
+          String accId);
+
+  /**
+   * Transfers the sequence feature to the target sequence, locating its start
+   * and end range based on the mapping. Features which do not overlap the
+   * target sequence are ignored.
+   * 
+   * @param sf
+   * @param targetSequence
+   * @param mapping
+   *          mapping from the sequence feature's coordinates to the target
+   *          sequence
+   */
+  protected void transferFeature(SequenceFeature sf,
+          SequenceI targetSequence, MapList mapping)
+  {
+    int start = sf.getBegin();
+    int end = sf.getEnd();
+    int[] mappedRange = mapping.locateInTo(start, end);
+  
+    if (mappedRange != null)
+    {
+      SequenceFeature copy = new SequenceFeature(sf);
+      copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
+      copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
+      targetSequence.addSequenceFeature(copy);
+
+      /*
+       * for sequence_variant, make an additional feature with consequence
+       */
+      if (SequenceOntology.getInstance().isSequenceVariant(sf.getType()))
+      {
+        String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
+        if (consequence != null)
+        {
+          SequenceFeature sf2 = new SequenceFeature("consequence",
+                  consequence, copy.getBegin(), copy.getEnd(), 0f,
+                  null);
+          targetSequence.addSequenceFeature(sf2);
+        }
+      }
+    }
+  }
+
+  /**
+   * Transfers features from sourceSequence to targetSequence
+   * 
+   * @param accessionId
+   * @param sourceSequence
+   * @param targetSequence
+   * @return true if any features were transferred, else false
+   */
+  protected boolean transferFeatures(String accessionId,
+          SequenceI sourceSequence, SequenceI targetSequence)
+  {
+    if (sourceSequence == null || targetSequence == null)
+    {
+      return false;
+    }
+
+    SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
+    MapList mapping = getGenomicRanges(sourceSequence, accessionId,
+            targetSequence.getStart());
+    if (mapping == null)
+    {
+      return false;
+    }
+
+    return transferFeatures(sfs, targetSequence, mapping, accessionId);
+  }
+
+  /**
+   * Transfer features to the target sequence. The start/end positions are
+   * converted using the mapping. Features which do not overlap are ignored.
+   * Features whose parent is not the specified identifier are also ignored.
+   * 
+   * @param features
+   * @param targetSequence
+   * @param mapping
+   * @param parentId
+   * @return
+   */
+  protected boolean transferFeatures(SequenceFeature[] features,
+          SequenceI targetSequence, MapList mapping, String parentId)
+  {
+    final boolean forwardStrand = mapping.isFromForwardStrand();
+
+    /*
+     * sort features by start position (descending if reverse strand) 
+     * before transferring (in forwards order) to the target sequence
+     */
+    Arrays.sort(features, new Comparator<SequenceFeature>()
+    {
+      @Override
+      public int compare(SequenceFeature o1, SequenceFeature o2)
+      {
+        int c = Integer.compare(o1.getBegin(), o2.getBegin());
+        return forwardStrand ? c : -c;
+      }
+    });
+
+    boolean transferred = false;
+    for (SequenceFeature sf : features)
+    {
+      if (retainFeature(sf, parentId))
+      {
+        transferFeature(sf, targetSequence, mapping);
+        transferred = true;
+      }
+    }
+    return transferred;
+  }
+
+  /**
+   * Answers true if the feature type is one we want to keep for the sequence.
+   * Some features are only retrieved in order to identify the sequence range,
+   * and may then be discarded as redundant information (e.g. "CDS" feature for
+   * a CDS sequence).
+   */
+  @SuppressWarnings("unused")
+  protected boolean retainFeature(SequenceFeature sf, String accessionId)
+  {
+    return true; // override as required
+  }
+
+  /**
+   * Answers true if the feature has a Parent which refers to the given
+   * accession id, or if the feature has no parent. Answers false if the
+   * feature's Parent is for a different accession id.
+   * 
+   * @param sf
+   * @param identifier
+   * @return
+   */
+  protected boolean featureMayBelong(SequenceFeature sf, String identifier)
+  {
+    String parent = (String) sf.getValue(PARENT);
+    // using contains to allow for prefix "gene:", "transcript:" etc
+    if (parent != null && !parent.contains(identifier))
+    {
+      // this genomic feature belongs to a different transcript
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public String getDescription()
+  {
+    return "Ensembl " + getSourceEnsemblType().getType()
+            + " sequence with variant features";
+  }
+
+  /**
+   * Returns a (possibly empty) list of features on the sequence which have the
+   * specified sequence ontology type (or a sub-type of it), and the given
+   * identifier as parent
+   * 
+   * @param sequence
+   * @param type
+   * @param parentId
+   * @return
+   */
+  protected List<SequenceFeature> findFeatures(SequenceI sequence,
+          String type, String parentId)
+  {
+    List<SequenceFeature> result = new ArrayList<SequenceFeature>();
+    
+    SequenceFeature[] sfs = sequence.getSequenceFeatures();
+    if (sfs != null) {
+      SequenceOntology so = SequenceOntology.getInstance();
+      for (SequenceFeature sf :sfs) {
+        if (so.isA(sf.getType(), type))
+        {
+          String parent = (String) sf.getValue(PARENT);
+          if (parent.equals(parentId))
+          {
+            result.add(sf);
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Maps exon features from dna to protein, and computes variants in peptide
+   * product generated by variants in dna, and adds them as sequence_variant
+   * features on the protein sequence. Returns the number of variant features
+   * added.
+   * 
+   * @param dnaSeq
+   * @param peptide
+   * @param dnaToProtein
+   */
+  static int computeProteinFeatures(SequenceI dnaSeq,
+          SequenceI peptide, MapList dnaToProtein)
+  {
+    while (dnaSeq.getDatasetSequence() != null)
+    {
+      dnaSeq = dnaSeq.getDatasetSequence();
+    }
+    while (peptide.getDatasetSequence() != null)
+    {
+      peptide = peptide.getDatasetSequence();
+    }
+  
+    mapExonsToProtein(dnaSeq, peptide, dnaToProtein);
+
+    LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
+            dnaSeq, dnaToProtein);
+  
+    /*
+     * scan codon variations, compute peptide variants and add to peptide sequence
+     */
+    int count = 0;
+    for (Entry<Integer, String[][]> variant : variants.entrySet())
+    {
+      int peptidePos = variant.getKey();
+      String[][] codonVariants = variant.getValue();
+      String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based
+      List<String> peptideVariants = computePeptideVariants(codonVariants,
+              residue);
+      if (!peptideVariants.isEmpty())
+      {
+        Collections.sort(peptideVariants);
+        String desc = StringUtils.listToDelimitedString(peptideVariants,
+                ", ");
+        SequenceFeature sf = new SequenceFeature(
+                SequenceOntology.SEQUENCE_VARIANT, desc, peptidePos,
+                peptidePos, 0f, null);
+        peptide.addSequenceFeature(sf);
+        count++;
+      }
+    }
+    return count;
+  }
+
+  /**
+   * Transfers exon features to the corresponding mapped regions of the protein
+   * sequence. This is useful because it allows visualisation of exon boundaries
+   * on the peptide (using 'colour by label' for the exon name). Returns the
+   * number of features written.
+   * 
+   * @param dnaSeq
+   * @param peptide
+   * @param dnaToProtein
+   */
+  static int mapExonsToProtein(SequenceI dnaSeq, SequenceI peptide,
+          MapList dnaToProtein)
+  {
+    SequenceFeature[] sfs = dnaSeq.getSequenceFeatures();
+    if (sfs == null)
+    {
+      return 0;
+    }
+
+    SequenceOntology so = SequenceOntology.getInstance();
+    int count = 0;
+
+    for (SequenceFeature sf : sfs)
+    {
+      if (so.isA(sf.getType(), SequenceOntology.EXON))
+      {
+        int start = sf.getBegin();
+        int end = sf.getEnd();
+        int[] mapsTo = dnaToProtein.locateInTo(start, end);
+        if (mapsTo != null)
+        {
+          SequenceFeature copy = new SequenceFeature(SequenceOntology.EXON,
+                  sf.getDescription(), mapsTo[0], mapsTo[1], 0f, null);
+          peptide.addSequenceFeature(copy);
+          count++;
+        }
+      }
+    }
+    return count;
+  }
+
+  /**
+   * Builds a map whose key is position in the protein sequence, and value is an
+   * array of all variants for the coding codon positions
+   * 
+   * @param dnaSeq
+   * @param dnaToProtein
+   * @return
+   */
+  static LinkedHashMap<Integer, String[][]> buildDnaVariantsMap(
+          SequenceI dnaSeq, MapList dnaToProtein)
+  {
+    /*
+     * map from peptide position to all variant features of the codon for it
+     * LinkedHashMap ensures we add the peptide features in sequence order
+     */
+    LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
+    SequenceOntology so = SequenceOntology.getInstance();
+  
+    SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
+    if (dnaFeatures == null)
+    {
+      return variants;
+    }
+  
+    int[] lastCodon = null;
+    int lastPeptidePostion = 0;
+  
+    /*
+     * build a map of codon variations for peptides
+     */
+    for (SequenceFeature sf : dnaFeatures)
+    {
+      int dnaCol = sf.getBegin();
+      if (dnaCol != sf.getEnd())
+      {
+        // not handling multi-locus variant features
+        continue;
+      }
+      if (so.isSequenceVariant(sf.getType()))
+      {
+        int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
+        if (mapsTo == null)
+        {
+          // feature doesn't lie within coding region
+          continue;
+        }
+        int peptidePosition = mapsTo[0];
+        String[][] codonVariants = variants.get(peptidePosition);
+        if (codonVariants == null)
+        {
+          codonVariants = new String[3][];
+          variants.put(peptidePosition, codonVariants);
+        }
+  
+        /*
+         * extract dna variants to a string array
+         */
+        String alls = (String) sf.getValue("alleles");
+        if (alls == null)
+        {
+          continue;
+        }
+        String[] alleles = alls.split(",");
+  
+        /*
+         * get this peptides codon positions e.g. [3, 4, 5] or [4, 7, 10]
+         */
+        int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
+                : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
+                        peptidePosition, peptidePosition));
+        lastPeptidePostion = peptidePosition;
+        lastCodon = codon;
+  
+        /*
+         * save nucleotide (and this variant) for each codon position
+         */
+        for (int codonPos = 0; codonPos < 3; codonPos++)
+        {
+          String nucleotide = String.valueOf(dnaSeq
+                  .getCharAt(codon[codonPos] - 1));
+          if (codon[codonPos] == dnaCol)
+          {
+            /*
+             * record current dna base and its alleles
+             */
+            String[] dnaVariants = new String[alleles.length + 1];
+            dnaVariants[0] = nucleotide;
+            System.arraycopy(alleles, 0, dnaVariants, 1, alleles.length);
+            codonVariants[codonPos] = dnaVariants;
+          }
+          else if (codonVariants[codonPos] == null)
+          {
+            /*
+             * record current dna base only 
+             * (at least until we find any variation and overwrite it)
+             */
+            codonVariants[codonPos] = new String[] { nucleotide };
+          }
+        }
+      }
+    }
+    return variants;
+  }
+
+  /**
+   * Returns a non-redundant list of all peptide translations generated by the
+   * given dna variants, excluding the current residue value
+   * 
+   * @param codonVariants
+   *          an array of base values for codon positions 1, 2, 3
+   * @param residue
+   *          the current residue translation
+   * @return
+   */
+  static List<String> computePeptideVariants(
+          String[][] codonVariants, String residue)
+  {
+    List<String> result = new ArrayList<String>();
+    for (String base1 : codonVariants[0])
+    {
+      for (String base2 : codonVariants[1])
+      {
+        for (String base3 : codonVariants[2])
+        {
+          String codon = base1 + base2 + base3;
+          // TODO: report frameshift/insertion/deletion
+          // and multiple-base variants?!
+          String peptide = codon.contains("-") ? "-" : ResidueProperties
+                  .codonTranslate(codon);
+          if (peptide != null && !result.contains(peptide)
+                  && !peptide.equals(residue))
+          {
+            result.add(peptide);
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Answers true if the feature type is either 'NMD_transcript_variant' or
+   * 'transcript' or one of its sub-types in the Sequence Ontology. This is
+   * needed because NMD_transcript_variant behaves like 'transcript' in Ensembl
+   * although strictly speaking it is not (it is a sub-type of
+   * sequence_variant).
+   * 
+   * @param featureType
+   * @return
+   */
+  public static boolean isTranscript(String featureType)
+  {
+    return NMD_VARIANT.equals(featureType)
+            || SequenceOntology.getInstance().isA(featureType, SequenceOntology.TRANSCRIPT);
+  }
+}
diff --git a/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java b/src/jalview/ext/ensembl/EnsemblSequenceFetcher.java
new file mode 100644 (file)
index 0000000..f1b96e2
--- /dev/null
@@ -0,0 +1,80 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.DBRefSource;
+import jalview.ws.seqfetcher.DbSourceProxyImpl;
+
+import com.stevesoft.pat.Regex;
+
+/**
+ * A base class for Ensembl sequence fetchers
+ * 
+ * @author gmcarstairs
+ *
+ */
+public abstract class EnsemblSequenceFetcher extends DbSourceProxyImpl
+{
+  /*
+   * possible values for the 'feature' parameter of the REST overlap endpoint
+   * @see 
+   */
+  protected enum EnsemblFeatureType
+  {
+    gene, transcript, cds, exon, repeat, simple, misc, variation,
+    somatic_variation, structural_variation, somatic_structural_variation,
+    constrained, regulatory
+  }
+
+  @Override
+  public String getDbSource()
+  {
+    // NB ensure Uniprot xrefs are canonicalised from "Ensembl" to "ENSEMBL"
+    return DBRefSource.ENSEMBL; // "ENSEMBL"
+  }
+
+  @Override
+  public String getDbVersion()
+  {
+    return "0";
+  }
+
+  @Override
+  public String getAccessionSeparator()
+  {
+    return " ";
+  }
+
+  @Override
+  public Regex getAccessionValidator()
+  {
+    return new Regex("((ENSP|ENST|ENSG|CCDS)[0-9.]{3,})");
+  }
+
+  @Override
+  public boolean isValidReference(String accession)
+  {
+    return getAccessionValidator().search(accession);
+  }
+
+  @Override
+  public int getTier()
+  {
+    return 0;
+  }
+
+  /**
+   * Default test query is a transcript
+   */
+  @Override
+  public String getTestQuery()
+  {
+    // has CDS on reverse strand:
+    return "ENST00000288602";
+    // ENST00000461457 // forward strand
+  }
+
+  @Override
+  public boolean isDnaCoding()
+  {
+    return true;
+  }
+}
diff --git a/src/jalview/ext/htsjdk/HtsContigDb.java b/src/jalview/ext/htsjdk/HtsContigDb.java
new file mode 100644 (file)
index 0000000..f3b5098
--- /dev/null
@@ -0,0 +1,210 @@
+package jalview.ext.htsjdk;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
+import htsjdk.samtools.util.StringUtil;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.io.File;
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * a source of sequence data accessed via the HTSJDK
+ * 
+ * @author jprocter
+ *
+ */
+public class HtsContigDb
+{
+
+  private String name;
+
+  private File dbLocation;
+
+  private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null;
+
+  public HtsContigDb(String name, File descriptor) throws Exception
+  {
+    if (descriptor.isFile())
+    {
+      this.name = name;
+      dbLocation = descriptor;
+    }
+    initSource();
+  }
+
+  private void initSource() throws Exception
+  {
+    if (refFile != null)
+    {
+      return;
+    }
+
+    refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(
+            dbLocation, true);
+    if (refFile == null || refFile.getSequenceDictionary() == null)
+    {
+      // refFile = initSequenceDictionaryFor(dbLocation);
+    }
+
+  }
+
+
+  SAMSequenceDictionary rrefDict = null;
+  private ReferenceSequenceFile initSequenceDictionaryFor(File dbLocation2) throws Exception
+  {
+    rrefDict = getDictionary(dbLocation2, true);
+    if (rrefDict != null)
+    {
+      ReferenceSequenceFile rrefFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(dbLocation2, true);
+      return rrefFile;
+    }
+    return null;
+  }
+  /**
+   * code below hacked out from picard ----
+   * 
+   * picard/src/java/picard/sam/CreateSequenceDictionary.java
+   * https://github.com/
+   * broadinstitute/picard/commit/270580d3e28123496576f0b91b3433179bb5d876
+   */
+
+
+  /*
+   * The MIT License
+   * 
+   * Copyright (c) 2009 The Broad Institute
+   * 
+   * Permission is hereby granted, free of charge, to any person obtaining a
+   * copy of this software and associated documentation files (the "Software"),
+   * to deal in the Software without restriction, including without limitation
+   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+   * and/or sell copies of the Software, and to permit persons to whom the
+   * Software is furnished to do so, subject to the following conditions:
+   * 
+   * The above copyright notice and this permission notice shall be included in
+   * all copies or substantial portions of the Software.
+   * 
+   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   * DEALINGS IN THE SOFTWARE.
+   */
+  /**
+   * 
+   * @param f
+   * @param truncate
+   * @return
+   * @throws Exception
+   */
+  SAMSequenceDictionary getDictionary(File f, boolean truncate)
+          throws Exception
+  {
+    if (md5 == null)
+    {
+      initCreateSequenceDictionary();
+    }
+    final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory
+            .getReferenceSequenceFile(f, truncate);
+    ReferenceSequence refSeq;
+    List<SAMSequenceRecord> ret = new ArrayList<SAMSequenceRecord>();
+    Set<String> sequenceNames = new HashSet<String>();
+    for (int numSequences = 0; (refSeq = refSeqFile.nextSequence()) != null; ++numSequences)
+    {
+      if (sequenceNames.contains(refSeq.getName()))
+      {
+        throw new Exception(
+                "Sequence name appears more than once in reference: "
+                        + refSeq.getName());
+      }
+      sequenceNames.add(refSeq.getName());
+      ret.add(makeSequenceRecord(refSeq));
+    }
+    return new SAMSequenceDictionary(ret);
+  }
+
+  public boolean isValid()
+  {
+    return dbLocation != null && refFile != null;
+  }
+
+  /**
+   * Create one SAMSequenceRecord from a single fasta sequence
+   */
+  private SAMSequenceRecord makeSequenceRecord(
+          final ReferenceSequence refSeq)
+  {
+
+    final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(),
+            refSeq.length());
+
+    // Compute MD5 of upcased bases
+    final byte[] bases = refSeq.getBases();
+    for (int i = 0; i < bases.length; ++i)
+    {
+      bases[i] = StringUtil.toUpperCase(bases[i]);
+    }
+
+    ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
+    // if (GENOME_ASSEMBLY != null) {
+    // ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
+    // }
+    // ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
+    // if (SPECIES != null) {
+    // ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
+    // }
+    return ret;
+  }
+
+  private MessageDigest md5;
+
+  public void initCreateSequenceDictionary() throws Exception
+  {
+    try
+    {
+      md5 = MessageDigest.getInstance("MD5");
+    } catch (NoSuchAlgorithmException e)
+    {
+      throw new Exception("MD5 algorithm not found", e);
+    }
+  }
+
+  private String md5Hash(final byte[] bytes)
+  {
+    md5.reset();
+    md5.update(bytes);
+    String s = new BigInteger(1, md5.digest()).toString(16);
+    if (s.length() != 32)
+    {
+      final String zeros = "00000000000000000000000000000000";
+      s = zeros.substring(0, 32 - s.length()) + s;
+    }
+    return s;
+  }
+
+  // ///// end of hts bits.
+
+  SequenceI getSequenceProxy(String id)
+  {
+    if (!isValid())
+    {
+      return null;
+    }
+
+    ReferenceSequence sseq = refFile.getSequence(id);
+    return new Sequence(sseq.getName(), new String(sseq.getBases()));
+  }
+}
index c615f6c..5ff7c6c 100644 (file)
@@ -131,7 +131,6 @@ import java.util.Deque;
 import java.util.Enumeration;
 import java.util.Hashtable;
 import java.util.List;
-import java.util.Set;
 import java.util.Vector;
 
 import javax.swing.JCheckBoxMenuItem;
@@ -828,6 +827,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
   public void setGUINucleotide(boolean nucleotide)
   {
     showTranslation.setVisible(nucleotide);
+    showReverse.setVisible(nucleotide);
+    showReverseComplement.setVisible(nucleotide);
     conservationMenuItem.setEnabled(!nucleotide);
     modifyConservation.setEnabled(!nucleotide);
     showGroupConservation.setEnabled(!nucleotide);
@@ -1980,7 +1981,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
           return;
         }
 
-        format = new IdentifyFile().Identify(str, "Paste");
+        format = new IdentifyFile().identify(str, "Paste");
 
       } catch (OutOfMemoryError er)
       {
@@ -4784,19 +4785,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
             // TODO 1: no mappings are set up for EMBL product
             // TODO 2: if they were, should add them to protein alignment, not
             // dna
-            Set<AlignedCodonFrame> cf = prods.getCodonFrames();
+            List<AlignedCodonFrame> cf = prods.getCodonFrames();
             for (AlignedCodonFrame acf : cf)
             {
               al.addCodonFrame(acf);
             }
-            AlignFrame naf = new AlignFrame(al, DEFAULT_WIDTH,
+            AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH,
                     DEFAULT_HEIGHT);
-            String newtitle = "" + ((dna) ? "Proteins" : "Nucleotides")
-                    + " for " + ((isRegSel) ? "selected region of " : "")
+            String newtitle = "" + (dna ? "Proteins" : "Nucleotides")
+                    + " for " + (isRegSel ? "selected region of " : "")
                     + getTitle();
-            naf.setTitle(newtitle);
+            newFrame.setTitle(newtitle);
 
-            // temporary flag until SplitFrame is released
             boolean asSplitFrame = Cache.getDefault(
                     Preferences.ENABLE_SPLIT_FRAME, true);
             if (asSplitFrame)
@@ -4810,7 +4810,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                       .getSequenceSelection();
               if (dna)
               {
-                copyAlignment = AlignmentUtils.makeExonAlignment(
+                copyAlignment = AlignmentUtils.makeCdsAlignment(
                         sequenceSelection, cf);
                 al.getCodonFrames().clear();
                 al.getCodonFrames().addAll(cf);
@@ -4827,9 +4827,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                       AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
               copyThis.setTitle(AlignFrame.this.getTitle());
               // SplitFrame with dna above, protein below
-              SplitFrame sf = new SplitFrame(dna ? copyThis : naf,
-                      dna ? naf : copyThis);
-              naf.setVisible(true);
+              boolean showSequenceFeatures = viewport
+                      .isShowSequenceFeatures();
+              newFrame.setShowSeqFeatures(showSequenceFeatures);
+              copyThis.setShowSeqFeatures(showSequenceFeatures);
+              SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
+                      dna ? newFrame : copyThis);
+              newFrame.setVisible(true);
               copyThis.setVisible(true);
               String linkedTitle = MessageManager
                       .getString("label.linked_view_title");
@@ -4837,7 +4841,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
             }
             else
             {
-              Desktop.addInternalFrame(naf, newtitle, DEFAULT_WIDTH,
+              Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
                       DEFAULT_HEIGHT);
             }
           }
@@ -5102,7 +5106,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               String type = null;
               try
               {
-                type = new IdentifyFile().Identify(file, protocol);
+                type = new IdentifyFile().identify(file, protocol);
               } catch (Exception ex)
               {
                 type = null;
@@ -5201,7 +5205,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
 
   /**
    * Attempt to load a "dropped" file or URL string: First by testing whether
-   * it's and Annotation file, then a JNet file, and finally a features file. If
+   * it's an Annotation file, then a JNet file, and finally a features file. If
    * all are false then the user may have dropped an alignment file onto this
    * AlignFrame.
    * 
@@ -5215,7 +5219,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
     {
       if (protocol == null)
       {
-        protocol = jalview.io.FormatAdapter.checkProtocol(file);
+        protocol = FormatAdapter.checkProtocol(file);
       }
       // if the file isn't identified, or not positively identified as some
       // other filetype (PFAM is default unidentified alignment file type) then
@@ -5276,7 +5280,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
           // try to parse it as a features file
           if (format == null)
           {
-            format = new IdentifyFile().Identify(file, protocol);
+            format = new IdentifyFile().identify(file, protocol);
           }
           if (format.equalsIgnoreCase("JnetFile"))
           {
@@ -5292,42 +5296,17 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
             viewport.setColumnSelection(cs);
             isAnnotation = true;
           }
-          else
+          else if (IdentifyFile.FeaturesFile.equals(format))
           {
-            /*
-             * if (format.equalsIgnoreCase("PDB")) {
-             * 
-             * String pdbfn = ""; // try to match up filename with sequence id
-             * try { if (protocol == jalview.io.FormatAdapter.FILE) { File fl =
-             * new File(file); pdbfn = fl.getName(); } else if (protocol ==
-             * jalview.io.FormatAdapter.URL) { URL url = new URL(file); pdbfn =
-             * url.getFile(); } } catch (Exception e) { } ; if (assocSeq ==
-             * null) { SequenceIdMatcher idm = new SequenceIdMatcher(viewport
-             * .getAlignment().getSequencesArray()); if (pdbfn.length() > 0) {
-             * // attempt to find a match in the alignment SequenceI mtch =
-             * idm.findIdMatch(pdbfn); int l = 0, c = pdbfn.indexOf("."); while
-             * (mtch == null && c != -1) { while ((c = pdbfn.indexOf(".", l)) >
-             * l) { l = c; } if (l > -1) { pdbfn = pdbfn.substring(0, l); } mtch
-             * = idm.findIdMatch(pdbfn); } if (mtch != null) { // try and
-             * associate // prompt ? PDBEntry pe = new AssociatePdbFileWithSeq()
-             * .associatePdbWithSeq(file, protocol, mtch, true); if (pe != null)
-             * { System.err.println("Associated file : " + file + " with " +
-             * mtch.getDisplayId(true)); alignPanel.paintAlignment(true); } } //
-             * TODO: maybe need to load as normal otherwise return; } }
-             */
-            // try to parse it as a features file
-            boolean isGroupsFile = parseFeaturesFile(file, protocol);
-            // if it wasn't a features file then we just treat it as a general
-            // alignment file to load into the current view.
-            if (!isGroupsFile)
+            if (parseFeaturesFile(file, protocol))
             {
-              new FileLoader().LoadFile(viewport, file, protocol, format);
+              alignPanel.paintAlignment(true);
             }
+          }
             else
             {
-              alignPanel.paintAlignment(true);
+              new FileLoader().LoadFile(viewport, file, protocol, format);
             }
-          }
         }
       }
       if (isAnnotation)
@@ -5349,7 +5328,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
       } catch (Exception x)
       {
       }
-      ;
       new OOMWarning(
               "loading data "
                       + (protocol != null ? (protocol.equals(FormatAdapter.PASTE) ? "from clipboard."
@@ -6008,7 +5986,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
   {
     // TODO no longer a menu action - refactor as required
     final AlignmentI alignment = getViewport().getAlignment();
-    Set<AlignedCodonFrame> mappings = alignment.getCodonFrames();
+    List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
     if (mappings == null)
     {
       return;
@@ -6063,6 +6041,27 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
       sf.setComplementVisible(this, show);
     }
   }
+
+  /**
+   * Generate the reverse (optionally complemented) of the selected sequences,
+   * and add them to the alignment
+   */
+  @Override
+  protected void showReverse_actionPerformed(boolean complement)
+  {
+    AlignmentI al = null;
+    try
+    {
+      Dna dna = new Dna(viewport, viewport.getViewAsVisibleContigs(true));
+
+      al = dna.reverseCdna(complement);
+      viewport.addAlignment(al, "");
+    } catch (Exception ex)
+    {
+      System.err.println(ex.getMessage());
+      return;
+    }
+  }
 }
 
 class PrintThread extends Thread
index 92c6355..06dc4c4 100644 (file)
@@ -72,7 +72,6 @@ import java.awt.Rectangle;
 import java.util.ArrayList;
 import java.util.Hashtable;
 import java.util.List;
-import java.util.Set;
 import java.util.Vector;
 
 import javax.swing.JInternalFrame;
@@ -461,7 +460,7 @@ public class AlignViewport extends AlignmentViewport implements
     AlignmentI al = getAlignment();
     if (al != null)
     {
-      Set<AlignedCodonFrame> mappings = al.getCodonFrames();
+      List<AlignedCodonFrame> mappings = al.getCodonFrames();
       if (mappings != null)
       {
         StructureSelectionManager ssm = StructureSelectionManager
@@ -482,6 +481,7 @@ public class AlignViewport extends AlignmentViewport implements
    * 
    * @return DOCUMENT ME!
    */
+  @Override
   public char getGapCharacter()
   {
     return getAlignment().getGapCharacter();
@@ -506,6 +506,7 @@ public class AlignViewport extends AlignmentViewport implements
    * 
    * @return DOCUMENT ME!
    */
+  @Override
   public ColumnSelection getColumnSelection()
   {
     return colSel;
@@ -617,6 +618,7 @@ public class AlignViewport extends AlignmentViewport implements
   /**
    * Send the current selection to be broadcast to any selection listeners.
    */
+  @Override
   public void sendSelection()
   {
     jalview.structure.StructureSelectionManager
@@ -736,6 +738,7 @@ public class AlignViewport extends AlignmentViewport implements
     return seqvectors.toArray(new SequenceI[seqvectors.size()][]);
   }
 
+  @Override
   public boolean isNormaliseSequenceLogo()
   {
     return normaliseSequenceLogo;
@@ -750,6 +753,7 @@ public class AlignViewport extends AlignmentViewport implements
    * 
    * @return true if alignment characters should be displayed
    */
+  @Override
   public boolean isValidCharWidth()
   {
     return validCharWidth;
@@ -825,10 +829,10 @@ public class AlignViewport extends AlignmentViewport implements
    * may give the user the option to open a new frame, or split panel, with cDNA
    * and protein linked.
    * 
-   * @param al
+   * @param toAdd
    * @param title
    */
-  public void addAlignment(AlignmentI al, String title)
+  public void addAlignment(AlignmentI toAdd, String title)
   {
     // TODO: promote to AlignViewportI? applet CutAndPasteTransfer is different
 
@@ -841,25 +845,26 @@ public class AlignViewport extends AlignmentViewport implements
     // TODO: create undo object for this JAL-1101
 
     /*
-     * If any cDNA/protein mappings can be made between the alignments, offer to
-     * open a linked alignment with split frame option.
+     * Ensure datasets are created for the new alignment as
+     * mappings operate on dataset sequences
+     */
+    toAdd.setDataset(null);
+
+    /*
+     * Check if any added sequence could be the object of a mapping or
+     * cross-reference; if so, make the mapping explicit 
+     */
+    getAlignment().realiseMappings(toAdd.getSequences());
+
+    /*
+     * If any cDNA/protein mappings exist or can be made between the alignments, 
+     * offer to open a split frame with linked alignments
      */
     if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
     {
-      if (al.getDataset() == null)
-      {
-        // need to create ds seqs
-        for (SequenceI sq : al.getSequences())
-        {
-          if (sq.getDatasetSequence() == null)
-          {
-            sq.createDatasetSequence();
-          }
-        }
-      }
-      if (AlignmentUtils.isMappable(al, getAlignment()))
+      if (AlignmentUtils.isMappable(toAdd, getAlignment()))
       {
-        if (openLinkedAlignment(al, title))
+        if (openLinkedAlignment(toAdd, title))
         {
           return;
         }
@@ -872,9 +877,22 @@ public class AlignViewport extends AlignmentViewport implements
     // TODO: JAL-407 regardless of above - identical sequences (based on ID and
     // provenance) should share the same dataset sequence
 
-    for (int i = 0; i < al.getHeight(); i++)
+    AlignmentI al = getAlignment();
+    String gap = String.valueOf(al.getGapCharacter());
+    for (int i = 0; i < toAdd.getHeight(); i++)
     {
-      getAlignment().addSequence(al.getSequenceAt(i));
+      SequenceI seq = toAdd.getSequenceAt(i);
+      /*
+       * experimental!
+       * - 'align' any mapped sequences as per existing 
+       *    e.g. cdna to genome, domain hit to protein sequence
+       * very experimental! (need a separate menu option for this)
+       * - only add mapped sequences ('select targets from a dataset')
+       */
+      if (true /*AlignmentUtils.alignSequenceAs(seq, al, gap, true, true)*/)
+      {
+        al.addSequence(seq);
+      }
     }
 
     setEndSeq(getAlignment().getHeight());
index 643d8a0..136a38e 100644 (file)
@@ -21,6 +21,7 @@
 package jalview.gui;
 
 import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.SequenceI;
 import jalview.io.AnnotationFile;
 import jalview.io.FeaturesFile;
 import jalview.io.JalviewFileChooser;
@@ -32,6 +33,7 @@ import java.awt.Color;
 import java.awt.FlowLayout;
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
+import java.util.Map;
 
 import javax.swing.BorderFactory;
 import javax.swing.ButtonGroup;
@@ -153,17 +155,21 @@ public class AnnotationExporter extends JPanel
             .getString("label.no_features_on_alignment");
     if (features)
     {
+      FeaturesFile formatter = new FeaturesFile();
+      SequenceI[] sequences = ap.av.getAlignment().getDataset()
+              .getSequencesArray();
+      Map<String, Object> featureColours = ap.getFeatureRenderer()
+              .getDisplayedFeatureCols();
+      boolean includeNonPositional = ap.av.isShowNPFeats();
       if (GFFFormat.isSelected())
       {
-        text = new FeaturesFile().printGFFFormat(ap.av.getAlignment()
-                .getDataset().getSequencesArray(), ap.getFeatureRenderer()
-                .getDisplayedFeatureCols(), true, ap.av.isShowNPFeats());// ap.av.featuresDisplayed//);
+        text = formatter.printGffFormat(sequences, featureColours, true,
+                includeNonPositional);
       }
       else
       {
-        text = new FeaturesFile().printJalviewFormat(ap.av.getAlignment()
-                .getDataset().getSequencesArray(), ap.getFeatureRenderer()
-                .getDisplayedFeatureCols(), true, ap.av.isShowNPFeats()); // ap.av.featuresDisplayed);
+        text = formatter.printJalviewFormat(sequences, featureColours,
+                true, includeNonPositional);
       }
     }
     else
@@ -236,6 +242,7 @@ public class AnnotationExporter extends JPanel
     toFile.setText(MessageManager.getString("label.to_file"));
     toFile.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         toFile_actionPerformed(e);
@@ -244,6 +251,7 @@ public class AnnotationExporter extends JPanel
     toTextbox.setText(MessageManager.getString("label.to_textbox"));
     toTextbox.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         toTextbox_actionPerformed(e);
@@ -252,6 +260,7 @@ public class AnnotationExporter extends JPanel
     close.setText(MessageManager.getString("action.close"));
     close.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         close_actionPerformed(e);
index 4541fc2..c0e59a4 100644 (file)
@@ -24,6 +24,7 @@ import jalview.api.AlignViewportI;
 import jalview.api.AlignmentViewPanel;
 import jalview.api.ComplexAlignFile;
 import jalview.api.FeaturesDisplayedI;
+import jalview.api.FeaturesSourceI;
 import jalview.bin.Jalview;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.ColumnSelection;
@@ -72,6 +73,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
   {
     SwingUtilities.invokeLater(new Runnable()
     {
+      @Override
       public void run()
       {
         textarea.requestFocus();
@@ -124,6 +126,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
     textarea.append(text);
   }
 
+  @Override
   public void save_actionPerformed(ActionEvent e)
   {
     JalviewFileChooser chooser = new JalviewFileChooser(
@@ -160,6 +163,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
    * @param e
    *          DOCUMENT ME!
    */
+  @Override
   public void copyItem_actionPerformed(ActionEvent e)
   {
     textarea.getSelectedText();
@@ -173,6 +177,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
    * @param e
    *          DOCUMENT ME!
    */
+  @Override
   public void pasteMenu_actionPerformed(ActionEvent e)
   {
     Clipboard c = Toolkit.getDefaultToolkit().getSystemClipboard();
@@ -198,6 +203,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
    * @param e
    *          DOCUMENT ME!
    */
+  @Override
   public void ok_actionPerformed(ActionEvent e)
   {
     String text = getText();
@@ -206,7 +212,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
       return;
     }
 
-    String format = new IdentifyFile().Identify(text, "Paste");
+    String format = new IdentifyFile().identify(text, "Paste");
     if (format == null || format.equalsIgnoreCase("EMPTY DATA FILE"))
     {
       System.err.println(MessageManager
@@ -247,7 +253,11 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
     {
       String title = MessageManager.formatMessage(
               "label.input_cut_paste_params", new String[] { format });
-      if (viewport != null)
+      /*
+       * if the view panel was closed its alignment is nulled
+       * and this is an orphaned cut and paste window
+       */
+      if (viewport != null && viewport.getAlignment() != null)
       {
         ((AlignViewport) viewport).addAlignment(al, title);
       }
@@ -282,6 +292,10 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
         {
           af = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
                   AlignFrame.DEFAULT_HEIGHT);
+          if (source instanceof FeaturesSourceI)
+          {
+            af.getViewport().setShowSequenceFeatures(true);
+          }
         }
 
         af.currentFileFormat = format;
@@ -319,6 +333,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
    * @param e
    *          DOCUMENT ME!
    */
+  @Override
   public void cancel_actionPerformed(ActionEvent e)
   {
     try
@@ -329,6 +344,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
     }
   }
 
+  @Override
   public void textarea_mousePressed(MouseEvent e)
   {
     if (SwingUtilities.isRightMouseButton(e))
@@ -339,6 +355,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
               MessageManager.getString("action.copy"));
       item.addActionListener(new ActionListener()
       {
+        @Override
         public void actionPerformed(ActionEvent e)
         {
           copyItem_actionPerformed(e);
@@ -348,6 +365,7 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer
       item = new JMenuItem(MessageManager.getString("action.paste"));
       item.addActionListener(new ActionListener()
       {
+        @Override
         public void actionPerformed(ActionEvent e)
         {
           pasteMenu_actionPerformed(e);
index d3b8afc..a10ec4e 100644 (file)
@@ -631,7 +631,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements
         String file = (String) contents
                 .getTransferData(DataFlavor.stringFlavor);
 
-        String format = new IdentifyFile().Identify(file,
+        String format = new IdentifyFile().identify(file,
                 FormatAdapter.PASTE);
 
         new FileLoader().LoadFile(file, FormatAdapter.PASTE, format);
@@ -977,7 +977,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements
           }
           else
           {
-            format = new IdentifyFile().Identify(file, protocol);
+            format = new IdentifyFile().identify(file, protocol);
           }
 
           new FileLoader().LoadFile(file, protocol, format);
@@ -1028,7 +1028,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements
       }
       else
       {
-        format = new IdentifyFile().Identify(choice, FormatAdapter.FILE);
+        format = new IdentifyFile().identify(choice, FormatAdapter.FILE);
       }
 
       if (viewport != null)
@@ -1111,7 +1111,7 @@ public class Desktop extends jalview.jbgui.GDesktop implements
     }
     else
     {
-      String format = new IdentifyFile().Identify(url, FormatAdapter.URL);
+      String format = new IdentifyFile().identify(url, FormatAdapter.URL);
 
       if (format.equals("URL NOT FOUND"))
       {
index 625cccc..4f5e7ea 100644 (file)
@@ -93,25 +93,28 @@ public class FeatureColourChooser extends JalviewDialog
     this.fr = frender;
     this.type = type;
     ap = fr.ap;
-    initDialogFrame(this, true, block, "Graduated Feature Colour for "
-            + type, 480, 185);
+    String title = MessageManager.formatMessage(
+            "label.graduated_color_for_params", new String[] { type });
+    initDialogFrame(this, true, block, title, 480, 185);
     // frame.setLayer(JLayeredPane.PALETTE_LAYER);
     // Desktop.addInternalFrame(frame, "Graduated Feature Colour for "+type,
     // 480, 145);
 
     slider.addChangeListener(new ChangeListener()
     {
+      @Override
       public void stateChanged(ChangeEvent evt)
       {
         if (!adjusting)
         {
-          thresholdValue.setText(((float) slider.getValue() / 1000f) + "");
+          thresholdValue.setText((slider.getValue() / 1000f) + "");
           valueChanged();
         }
       }
     });
     slider.addMouseListener(new MouseAdapter()
     {
+      @Override
       public void mouseReleased(MouseEvent evt)
       {
         if (ap != null)
@@ -202,6 +205,7 @@ public class FeatureColourChooser extends JalviewDialog
     minColour.setToolTipText(MessageManager.getString("label.min_colour"));
     minColour.addMouseListener(new MouseAdapter()
     {
+      @Override
       public void mousePressed(MouseEvent e)
       {
         if (minColour.isEnabled())
@@ -216,6 +220,7 @@ public class FeatureColourChooser extends JalviewDialog
     maxColour.setToolTipText(MessageManager.getString("label.max_colour"));
     maxColour.addMouseListener(new MouseAdapter()
     {
+      @Override
       public void mousePressed(MouseEvent e)
       {
         if (maxColour.isEnabled())
@@ -235,6 +240,7 @@ public class FeatureColourChooser extends JalviewDialog
     jPanel2.setBackground(Color.white);
     threshold.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         threshold_actionPerformed(e);
@@ -251,6 +257,7 @@ public class FeatureColourChooser extends JalviewDialog
     jPanel3.setLayout(flowLayout2);
     thresholdValue.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent e)
       {
         thresholdValue_actionPerformed(e);
@@ -274,6 +281,7 @@ public class FeatureColourChooser extends JalviewDialog
             .getString("label.toggle_absolute_relative_display_threshold"));
     thresholdIsMin.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent actionEvent)
       {
         thresholdIsMin_actionPerformed(actionEvent);
@@ -287,6 +295,7 @@ public class FeatureColourChooser extends JalviewDialog
                     .getString("label.display_features_same_type_different_label_using_different_colour"));
     colourByLabel.addActionListener(new ActionListener()
     {
+      @Override
       public void actionPerformed(ActionEvent actionEvent)
       {
         colourByLabel_actionPerformed(actionEvent);
@@ -488,6 +497,7 @@ public class FeatureColourChooser extends JalviewDialog
     ap.paintAlignment(false);
   }
 
+  @Override
   protected void raiseClosed()
   {
     if (this.colourEditor != null)
@@ -496,11 +506,13 @@ public class FeatureColourChooser extends JalviewDialog
     }
   }
 
+  @Override
   public void okPressed()
   {
     changeColour();
   }
 
+  @Override
   public void cancelPressed()
   {
     reset();
@@ -542,7 +554,7 @@ public class FeatureColourChooser extends JalviewDialog
 
   public void valueChanged()
   {
-    threshline.value = (float) slider.getValue() / 1000f;
+    threshline.value = slider.getValue() / 1000f;
     cs.setThresh(threshline.value);
     changeColour();
     ap.paintAlignment(false);
index a886723..6f602ad 100644 (file)
@@ -46,6 +46,7 @@ import javax.swing.JLabel;
 import javax.swing.JPanel;
 import javax.swing.JScrollPane;
 import javax.swing.JTree;
+import javax.swing.ToolTipManager;
 import javax.swing.event.TreeSelectionEvent;
 import javax.swing.event.TreeSelectionListener;
 import javax.swing.tree.DefaultMutableTreeNode;
@@ -244,6 +245,7 @@ public class JDatabaseTree extends JalviewDialog implements KeyListener
     public DbTreeRenderer(JDatabaseTree me)
     {
       us = me;
+      ToolTipManager.sharedInstance().registerComponent(dbviews);
     }
 
     private Component returnLabel(String txt)
@@ -265,19 +267,23 @@ public class JDatabaseTree extends JalviewDialog implements KeyListener
         value = vl.getUserObject();
         if (value instanceof DbSourceProxy)
         {
-          val = (((DbSourceProxy) value).getDbName());
+          val = ((DbSourceProxy) value).getDbName();
+          if (((DbSourceProxy) value).getDescription() != null)
+          { // getName()
+            this.setToolTipText(((DbSourceProxy) value).getDescription());
+          }
         }
         else
         {
           if (value instanceof String)
           {
-            val = ((String) value);
+            val = (String) value;
           }
         }
       }
       if (value == null)
       {
-        val = ("");
+        val = "";
       }
       return super.getTreeCellRendererComponent(tree, val, selected,
               expanded, leaf, row, hasFocus);
index fc96edc..3f3e8f6 100644 (file)
@@ -761,10 +761,11 @@ public class Jalview2XML
           if (sf[index].otherDetails != null)
           {
             String key;
-            Enumeration keys = sf[index].otherDetails.keys();
-            while (keys.hasMoreElements())
+            Iterator<String> keys = sf[index].otherDetails.keySet()
+                    .iterator();
+            while (keys.hasNext())
             {
-              key = keys.nextElement().toString();
+              key = keys.next();
               OtherData keyValue = new OtherData();
               keyValue.setKey(key);
               keyValue.setValue(sf[index].otherDetails.get(key).toString());
@@ -879,7 +880,7 @@ public class Jalview2XML
     // SAVE MAPPINGS
     if (jal.getCodonFrames() != null)
     {
-      Set<AlignedCodonFrame> jac = jal.getCodonFrames();
+      List<AlignedCodonFrame> jac = jal.getCodonFrames();
       for (AlignedCodonFrame acf : jac)
       {
         AlcodonFrame alc = new AlcodonFrame();
@@ -2178,6 +2179,7 @@ public class Jalview2XML
       {
         SwingUtilities.invokeAndWait(new Runnable()
         {
+          @Override
           public void run()
           {
             setLoadingFinishedForNewStructureViewers();
index 191e8fe..2004761 100755 (executable)
@@ -22,7 +22,6 @@ package jalview.gui;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.FormatAdapter;
@@ -54,8 +53,6 @@ import javax.swing.JTextArea;
 import javax.swing.SwingConstants;
 import javax.swing.tree.DefaultMutableTreeNode;
 
-import com.stevesoft.pat.Regex;
-
 public class SequenceFetcher extends JPanel implements Runnable
 {
   JLabel dbeg = new JLabel();
@@ -518,6 +515,7 @@ public class SequenceFetcher extends JPanel implements Runnable
       resetDialog();
       return;
     }
+    // TODO: Refactor to GUI independent code and write tests.
     // indicate if successive sources should be merged into one alignment.
     boolean addToLast = false;
     ArrayList<String> aresultq = new ArrayList<String>(), presultTitle = new ArrayList<String>();
@@ -552,7 +550,7 @@ public class SequenceFetcher extends JPanel implements Runnable
                             Integer.valueOf(nqueries).toString(),
                             proxy.getDbName() }), Thread.currentThread()
                         .hashCode());
-        isAliSource = proxy.isA(DBRefSource.ALIGNMENTDB);
+        isAliSource = proxy.isAlignmentSource();
         if (proxy.getAccessionSeparator() == null)
         {
           while (en.hasNext())
@@ -639,23 +637,8 @@ public class SequenceFetcher extends JPanel implements Runnable
                 DBRefEntry dbr = new DBRefEntry(), found[] = null;
                 dbr.setSource(proxy.getDbSource());
                 dbr.setVersion(null);
-                if (proxy.getAccessionValidator() != null)
-                {
-                  Regex vgr = proxy.getAccessionValidator();
-                  vgr.search(q);
-                  if (vgr.numSubs() > 0)
-                  {
-                    dbr.setAccessionId(vgr.stringMatched(1));
-                  }
-                  else
-                  {
-                    dbr.setAccessionId(vgr.stringMatched());
-                  }
-                }
-                else
-                {
-                  dbr.setAccessionId(q);
-                }
+                String accId = proxy.getAccessionIdFromQuery(q);
+                dbr.setAccessionId(accId);
                 boolean rfound = false;
                 for (int r = 0; r < rs.length; r++)
                 {
@@ -790,7 +773,7 @@ public class SequenceFetcher extends JPanel implements Runnable
 
   AlignmentI parseResult(String result, String title)
   {
-    String format = new IdentifyFile().Identify(result, "Paste");
+    String format = new IdentifyFile().identify(result, "Paste");
     AlignmentI sequences = null;
     if (FormatAdapter.isValidFormat(format))
     {
index 3a6d266..083c7ec 100644 (file)
@@ -38,15 +38,12 @@ import java.awt.event.KeyEvent;
 import java.awt.event.KeyListener;
 import java.beans.PropertyVetoException;
 import java.util.Map.Entry;
-import java.util.Set;
 
 import javax.swing.AbstractAction;
 import javax.swing.InputMap;
 import javax.swing.JComponent;
 import javax.swing.JMenuItem;
 import javax.swing.KeyStroke;
-import javax.swing.UIDefaults;
-import javax.swing.UIManager;
 import javax.swing.event.InternalFrameAdapter;
 import javax.swing.event.InternalFrameEvent;
 
@@ -89,13 +86,6 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI
      * estimate width and height of SplitFrame; this.getInsets() doesn't seem to
      * give the full additional size (a few pixels short)
      */
-    UIDefaults defaults = UIManager.getDefaults();
-    Set<Object> keySet = defaults.keySet();
-    for (Object key : keySet)
-    {
-      System.out.println(key.toString() + " = "
-              + UIManager.get(key).toString());
-    }
     int widthFudge = Platform.isAMac() ? 28 : 28; // Windows tbc
     int heightFudge = Platform.isAMac() ? 50 : 50; // tbc
     int width = ((AlignFrame) getTopFrame()).getWidth() + widthFudge;
index 7e0cabd..2b8f127 100755 (executable)
@@ -47,7 +47,8 @@ public abstract class AlignFile extends FileParse
   int maxLength = 0;
 
   /**
-   * Sequences to be added to form a new alignment.
+   * Sequences to be added to form a new alignment. TODO: remove vector in this
+   * class
    */
   protected Vector<SequenceI> seqs;
 
@@ -351,7 +352,27 @@ public abstract class AlignFile extends FileParse
     if (space > -1)
     {
       seq = new Sequence(id.substring(0, space), "");
-      seq.setDescription(id.substring(space + 1));
+      String desc = id.substring(space + 1);
+      seq.setDescription(desc);
+
+      if (desc.startsWith("chromosome"))
+      {
+        /*
+         * parse Ensembl style gene description e.g.
+         * chromosome:GRCh38:7:140696688:140721955:1
+         */
+        String[] tokens = desc.split(":");
+        if (tokens.length > 3)
+        {
+          try
+          {
+            seq.setStart(Integer.parseInt(tokens[3]));
+          } catch (NumberFormatException e)
+          {
+            // ignore
+          }
+        }
+      }
     }
     else
     {
index 239c531..e34093d 100755 (executable)
@@ -86,7 +86,7 @@ public class AppletFormatAdapter
   public static final String[] READABLE_FORMATS = new String[] { "BLC",
       "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", "PDB",
       "JnetFile", "RNAML", PhylipFile.FILE_DESC, JSONFile.FILE_DESC,
-      IdentifyFile.GFF3File, "HTML" };
+      IdentifyFile.FeaturesFile, "HTML" };
 
   /**
    * List of readable format file extensions by application in order
@@ -103,7 +103,7 @@ public class AppletFormatAdapter
    */
   public static final String[] READABLE_FNAMES = new String[] { "Fasta",
       "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Stockholm", "RNAML",
-      PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.GFF3File,
+      PhylipFile.FILE_DESC, JSONFile.FILE_DESC, IdentifyFile.FeaturesFile,
       "Jalview", HtmlFile.FILE_DESC };
 
   /**
@@ -306,9 +306,9 @@ public class AppletFormatAdapter
       {
         alignFile = new RnamlFile(inFile, type);
       }
-      else if (format.equals(IdentifyFile.GFF3File))
+      else if (format.equals(IdentifyFile.FeaturesFile))
       {
-        alignFile = new Gff3File(inFile, type);
+        alignFile = new FeaturesFile(true, inFile, type);
       }
       return buildAlignmentFrom(alignFile);
     } catch (Exception e)
@@ -426,9 +426,10 @@ public class AppletFormatAdapter
       {
         alignFile = new PhylipFile(source);
       }
-      else if (format.equals(IdentifyFile.GFF3File))
+      else if (format.equals(IdentifyFile.FeaturesFile))
       {
-        alignFile = new Gff3File(inFile, type);
+        // enforce 'parseImmediately' here:
+        alignFile = new FeaturesFile(true, inFile, type);
       }
       else if (format.equals(JSONFile.FILE_DESC))
       {
@@ -669,7 +670,7 @@ public class AppletFormatAdapter
           long memf = -r.totalMemory() + r.freeMemory();
           long t1 = -System.currentTimeMillis();
           AlignmentI al = afa.readFile(args[i], FILE,
-                  new IdentifyFile().Identify(args[i], FILE));
+                  new IdentifyFile().identify(args[i], FILE));
           t1 += System.currentTimeMillis();
           System.gc();
           memf += r.totalMemory() - r.freeMemory();
@@ -835,7 +836,7 @@ public class AppletFormatAdapter
     {
       try
       {
-        String idformat = new jalview.io.IdentifyFile().Identify(file,
+        String idformat = new jalview.io.IdentifyFile().identify(file,
                 protocol);
         if (idformat == null)
         {
index aa5583c..2dd5f26 100755 (executable)
  */
 package jalview.io;
 
+import jalview.analysis.AlignmentUtils;
 import jalview.analysis.SequenceIdMatcher;
+import jalview.api.AlignViewportI;
+import jalview.api.FeaturesSourceI;
 import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.io.gff.GffHelperBase;
+import jalview.io.gff.GffHelperFactory;
+import jalview.io.gff.GffHelperI;
 import jalview.schemes.AnnotationColourGradient;
 import jalview.schemes.GraduatedColor;
 import jalview.schemes.UserColourScheme;
 import jalview.util.Format;
 import jalview.util.MapList;
+import jalview.util.ParseHtmlBodyAndLinks;
+import jalview.util.StringUtils;
 
+import java.awt.Color;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.StringTokenizer;
-import java.util.Vector;
 
 /**
- * Parse and create Jalview Features files Detects GFF format features files and
- * parses. Does not implement standard print() - call specific printFeatures or
- * printGFF. Uses AlignmentI.findSequence(String id) to find the sequence object
- * for the features annotation - this normally works on an exact match.
+ * Parses and writes features files, which may be in Jalview, GFF2 or GFF3
+ * format. These are tab-delimited formats but with differences in the use of
+ * columns.
+ * 
+ * A Jalview feature file may define feature colours and then declare that the
+ * remainder of the file is in GFF format with the line 'GFF'.
+ * 
+ * GFF3 files may include alignment mappings for features, which Jalview will
+ * attempt to model, and may include sequence data following a ##FASTA line.
+ * 
  * 
  * @author AMW
- * @version $Revision$
+ * @author jbprocter
+ * @author gmcarstairs
  */
-public class FeaturesFile extends AlignFile
+public class FeaturesFile extends AlignFile implements FeaturesSourceI
 {
-  /**
-   * work around for GFF interpretation bug where source string becomes
-   * description rather than a group
-   */
-  private boolean doGffSource = true;
+  private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
+
+  private static final String NOTE = "Note";
 
-  private int gffversion;
+  protected static final String TAB = "\t";
+
+  protected static final String GFF_VERSION = "##gff-version";
+
+  private AlignmentI lastmatchedAl = null;
+
+  private SequenceIdMatcher matcher = null;
+
+  protected AlignmentI dataset;
+
+  protected int gffVersion;
 
   /**
    * Creates a new FeaturesFile object.
@@ -70,13 +94,15 @@ public class FeaturesFile extends AlignFile
   }
 
   /**
+   * Constructor which does not parse the file immediately
+   * 
    * @param inFile
    * @param type
    * @throws IOException
    */
   public FeaturesFile(String inFile, String type) throws IOException
   {
-    super(inFile, type);
+    super(false, inFile, type);
   }
 
   /**
@@ -89,17 +115,8 @@ public class FeaturesFile extends AlignFile
   }
 
   /**
-   * @param parseImmediately
-   * @param source
-   * @throws IOException
-   */
-  public FeaturesFile(boolean parseImmediately, FileParse source)
-          throws IOException
-  {
-    super(parseImmediately, source);
-  }
-
-  /**
+   * Constructor that optionally parses the file immediately
+   * 
    * @param parseImmediately
    * @param inFile
    * @param type
@@ -123,565 +140,121 @@ public class FeaturesFile extends AlignFile
    *          - process html strings into plain text
    * @return true if features were added
    */
-  public boolean parse(AlignmentI align, Map colours, boolean removeHTML)
-  {
-    return parse(align, colours, null, removeHTML, false);
-  }
-
-  /**
-   * Parse GFF or sequence features file optionally using case-independent
-   * matching, discarding URLs
-   * 
-   * @param align
-   *          - alignment/dataset containing sequences that are to be annotated
-   * @param colours
-   *          - hashtable to store feature colour definitions
-   * @param removeHTML
-   *          - process html strings into plain text
-   * @param relaxedIdmatching
-   *          - when true, ID matches to compound sequence IDs are allowed
-   * @return true if features were added
-   */
-  public boolean parse(AlignmentI align, Map colours, boolean removeHTML,
-          boolean relaxedIdMatching)
+  public boolean parse(AlignmentI align, Map<String, Object> colours,
+          boolean removeHTML)
   {
-    return parse(align, colours, null, removeHTML, relaxedIdMatching);
+    return parse(align, colours, removeHTML, false);
   }
 
   /**
-   * Parse GFF or sequence features file optionally using case-independent
-   * matching
-   * 
-   * @param align
-   *          - alignment/dataset containing sequences that are to be annotated
-   * @param colours
-   *          - hashtable to store feature colour definitions
-   * @param featureLink
-   *          - hashtable to store associated URLs
-   * @param removeHTML
-   *          - process html strings into plain text
-   * @return true if features were added
+   * Extends the default addProperties by also adding peptide-to-cDNA mappings
+   * (if any) derived while parsing a GFF file
    */
-  public boolean parse(AlignmentI align, Map colours, Map featureLink,
-          boolean removeHTML)
-  {
-    return parse(align, colours, featureLink, removeHTML, false);
-  }
-
-  @Override
-  public void addAnnotations(AlignmentI al)
-  {
-    // TODO Auto-generated method stub
-    super.addAnnotations(al);
-  }
-
   @Override
   public void addProperties(AlignmentI al)
   {
-    // TODO Auto-generated method stub
     super.addProperties(al);
-  }
-
-  @Override
-  public void addSeqGroups(AlignmentI al)
-  {
-    // TODO Auto-generated method stub
-    super.addSeqGroups(al);
+    if (dataset != null && dataset.getCodonFrames() != null)
+    {
+      AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();
+      for (AlignedCodonFrame codons : dataset.getCodonFrames())
+      {
+        ds.addCodonFrame(codons);
+      }
+    }
   }
 
   /**
-   * Parse GFF or sequence features file
+   * Parse GFF or Jalview format sequence features file
    * 
    * @param align
    *          - alignment/dataset containing sequences that are to be annotated
    * @param colours
    *          - hashtable to store feature colour definitions
-   * @param featureLink
-   *          - hashtable to store associated URLs
    * @param removeHTML
    *          - process html strings into plain text
    * @param relaxedIdmatching
    *          - when true, ID matches to compound sequence IDs are allowed
    * @return true if features were added
    */
-  public boolean parse(AlignmentI align, Map colours, Map featureLink,
+  public boolean parse(AlignmentI align, Map<String, Object> colours,
           boolean removeHTML, boolean relaxedIdmatching)
   {
+    Map<String, String> gffProps = new HashMap<String, String>();
+    /*
+     * keep track of any sequences we try to create from the data
+     */
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
 
     String line = null;
     try
     {
-      SequenceI seq = null;
-      /**
-       * keep track of any sequences we try to create from the data if it is a
-       * GFF3 file
-       */
-      ArrayList<SequenceI> newseqs = new ArrayList<SequenceI>();
-      String type, desc, token = null;
-
-      int index, start, end;
-      float score;
-      StringTokenizer st;
-      SequenceFeature sf;
-      String featureGroup = null, groupLink = null;
-      Map typeLink = new Hashtable();
-      /**
-       * when true, assume GFF style features rather than Jalview style.
-       */
-      boolean GFFFile = true;
-      Map<String, String> gffProps = new HashMap<String, String>();
+      String[] gffColumns;
+      String featureGroup = null;
+
       while ((line = nextLine()) != null)
       {
         // skip comments/process pragmas
-        if (line.startsWith("#"))
+        if (line.length() == 0 || line.startsWith("#"))
         {
-          if (line.startsWith("##"))
+          if (line.toLowerCase().startsWith("##"))
           {
-            // possibly GFF2/3 version and metadata header
             processGffPragma(line, gffProps, align, newseqs);
-            line = "";
           }
           continue;
         }
 
-        st = new StringTokenizer(line, "\t");
-        if (st.countTokens() == 1)
+        gffColumns = line.split("\\t"); // tab as regex
+        if (gffColumns.length == 1)
         {
           if (line.trim().equalsIgnoreCase("GFF"))
           {
-            // Start parsing file as if it might be GFF again.
-            GFFFile = true;
+            /*
+             * Jalview features file with appended GFF
+             * assume GFF2 (though it may declare ##gff-version 3)
+             */
+            gffVersion = 2;
             continue;
           }
         }
-        if (st.countTokens() > 1 && st.countTokens() < 4)
+
+        if (gffColumns.length > 1 && gffColumns.length < 4)
         {
-          GFFFile = false;
-          type = st.nextToken();
-          if (type.equalsIgnoreCase("startgroup"))
+          /*
+           * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
+           * a feature type colour specification
+           */
+          String ft = gffColumns[0];
+          if (ft.equalsIgnoreCase("startgroup"))
           {
-            featureGroup = st.nextToken();
-            if (st.hasMoreElements())
-            {
-              groupLink = st.nextToken();
-              featureLink.put(featureGroup, groupLink);
-            }
+            featureGroup = gffColumns[1];
           }
-          else if (type.equalsIgnoreCase("endgroup"))
+          else if (ft.equalsIgnoreCase("endgroup"))
           {
             // We should check whether this is the current group,
             // but at present theres no way of showing more than 1 group
-            st.nextToken();
             featureGroup = null;
-            groupLink = null;
           }
           else
           {
-            Object colour = null;
-            String colscheme = st.nextToken();
-            if (colscheme.indexOf("|") > -1
-                    || colscheme.trim().equalsIgnoreCase("label"))
-            {
-              // Parse '|' separated graduated colourscheme fields:
-              // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue]
-              // can either provide 'label' only, first is optional, next two
-              // colors are required (but may be
-              // left blank), next is optional, nxt two min/max are required.
-              // first is either 'label'
-              // first/second and third are both hexadecimal or word equivalent
-              // colour.
-              // next two are values parsed as floats.
-              // fifth is either 'above','below', or 'none'.
-              // sixth is a float value and only required when fifth is either
-              // 'above' or 'below'.
-              StringTokenizer gcol = new StringTokenizer(colscheme, "|",
-                      true);
-              // set defaults
-              int threshtype = AnnotationColourGradient.NO_THRESHOLD;
-              float min = Float.MIN_VALUE, max = Float.MAX_VALUE, threshval = Float.NaN;
-              boolean labelCol = false;
-              // Parse spec line
-              String mincol = gcol.nextToken();
-              if (mincol == "|")
-              {
-                System.err
-                        .println("Expected either 'label' or a colour specification in the line: "
-                                + line);
-                continue;
-              }
-              String maxcol = null;
-              if (mincol.toLowerCase().indexOf("label") == 0)
-              {
-                labelCol = true;
-                mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip
-                                                                           // '|'
-                mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);
-              }
-              String abso = null, minval, maxval;
-              if (mincol != null)
-              {
-                // at least four more tokens
-                if (mincol.equals("|"))
-                {
-                  mincol = "";
-                }
-                else
-                {
-                  gcol.nextToken(); // skip next '|'
-                }
-                // continue parsing rest of line
-                maxcol = gcol.nextToken();
-                if (maxcol.equals("|"))
-                {
-                  maxcol = "";
-                }
-                else
-                {
-                  gcol.nextToken(); // skip next '|'
-                }
-                abso = gcol.nextToken();
-                gcol.nextToken(); // skip next '|'
-                if (abso.toLowerCase().indexOf("abso") != 0)
-                {
-                  minval = abso;
-                  abso = null;
-                }
-                else
-                {
-                  minval = gcol.nextToken();
-                  gcol.nextToken(); // skip next '|'
-                }
-                maxval = gcol.nextToken();
-                if (gcol.hasMoreTokens())
-                {
-                  gcol.nextToken(); // skip next '|'
-                }
-                try
-                {
-                  if (minval.length() > 0)
-                  {
-                    min = new Float(minval).floatValue();
-                  }
-                } catch (Exception e)
-                {
-                  System.err
-                          .println("Couldn't parse the minimum value for graduated colour for type ("
-                                  + colscheme
-                                  + ") - did you misspell 'auto' for the optional automatic colour switch ?");
-                  e.printStackTrace();
-                }
-                try
-                {
-                  if (maxval.length() > 0)
-                  {
-                    max = new Float(maxval).floatValue();
-                  }
-                } catch (Exception e)
-                {
-                  System.err
-                          .println("Couldn't parse the maximum value for graduated colour for type ("
-                                  + colscheme + ")");
-                  e.printStackTrace();
-                }
-              }
-              else
-              {
-                // add in some dummy min/max colours for the label-only
-                // colourscheme.
-                mincol = "FFFFFF";
-                maxcol = "000000";
-              }
-              try
-              {
-                colour = new jalview.schemes.GraduatedColor(
-                        new UserColourScheme(mincol).findColour('A'),
-                        new UserColourScheme(maxcol).findColour('A'), min,
-                        max);
-              } catch (Exception e)
-              {
-                System.err
-                        .println("Couldn't parse the graduated colour scheme ("
-                                + colscheme + ")");
-                e.printStackTrace();
-              }
-              if (colour != null)
-              {
-                ((jalview.schemes.GraduatedColor) colour)
-                        .setColourByLabel(labelCol);
-                ((jalview.schemes.GraduatedColor) colour)
-                        .setAutoScaled(abso == null);
-                // add in any additional parameters
-                String ttype = null, tval = null;
-                if (gcol.hasMoreTokens())
-                {
-                  // threshold type and possibly a threshold value
-                  ttype = gcol.nextToken();
-                  if (ttype.toLowerCase().startsWith("below"))
-                  {
-                    ((jalview.schemes.GraduatedColor) colour)
-                            .setThreshType(AnnotationColourGradient.BELOW_THRESHOLD);
-                  }
-                  else if (ttype.toLowerCase().startsWith("above"))
-                  {
-                    ((jalview.schemes.GraduatedColor) colour)
-                            .setThreshType(AnnotationColourGradient.ABOVE_THRESHOLD);
-                  }
-                  else
-                  {
-                    ((jalview.schemes.GraduatedColor) colour)
-                            .setThreshType(AnnotationColourGradient.NO_THRESHOLD);
-                    if (!ttype.toLowerCase().startsWith("no"))
-                    {
-                      System.err
-                              .println("Ignoring unrecognised threshold type : "
-                                      + ttype);
-                    }
-                  }
-                }
-                if (((GraduatedColor) colour).getThreshType() != AnnotationColourGradient.NO_THRESHOLD)
-                {
-                  try
-                  {
-                    gcol.nextToken();
-                    tval = gcol.nextToken();
-                    ((jalview.schemes.GraduatedColor) colour)
-                            .setThresh(new Float(tval).floatValue());
-                  } catch (Exception e)
-                  {
-                    System.err
-                            .println("Couldn't parse threshold value as a float: ("
-                                    + tval + ")");
-                    e.printStackTrace();
-                  }
-                }
-                // parse the thresh-is-min token ?
-                if (gcol.hasMoreTokens())
-                {
-                  System.err
-                          .println("Ignoring additional tokens in parameters in graduated colour specification\n");
-                  while (gcol.hasMoreTokens())
-                  {
-                    System.err.println("|" + gcol.nextToken());
-                  }
-                  System.err.println("\n");
-                }
-              }
-            }
-            else
-            {
-              UserColourScheme ucs = new UserColourScheme(colscheme);
-              colour = ucs.findColour('A');
-            }
-            if (colour != null)
-            {
-              colours.put(type, colour);
-            }
-            if (st.hasMoreElements())
-            {
-              String link = st.nextToken();
-              typeLink.put(type, link);
-              if (featureLink == null)
-              {
-                featureLink = new Hashtable();
-              }
-              featureLink.put(type, link);
-            }
+            parseFeatureColour(line, ft, gffColumns, colours);
           }
           continue;
         }
-        String seqId = "";
-        while (st.hasMoreElements())
-        {
-
-          if (GFFFile)
-          {
-            // Still possible this is an old Jalview file,
-            // which does not have type colours at the beginning
-            seqId = token = st.nextToken();
-            seq = findName(align, seqId, relaxedIdmatching, newseqs);
-            if (seq != null)
-            {
-              desc = st.nextToken();
-              String group = null;
-              if (doGffSource && desc.indexOf(' ') == -1)
-              {
-                // could also be a source term rather than description line
-                group = new String(desc);
-              }
-              type = st.nextToken();
-              try
-              {
-                String stt = st.nextToken();
-                if (stt.length() == 0 || stt.equals("-"))
-                {
-                  start = 0;
-                }
-                else
-                {
-                  start = Integer.parseInt(stt);
-                }
-              } catch (NumberFormatException ex)
-              {
-                start = 0;
-              }
-              try
-              {
-                String stt = st.nextToken();
-                if (stt.length() == 0 || stt.equals("-"))
-                {
-                  end = 0;
-                }
-                else
-                {
-                  end = Integer.parseInt(stt);
-                }
-              } catch (NumberFormatException ex)
-              {
-                end = 0;
-              }
-              // TODO: decide if non positional feature assertion for input data
-              // where end==0 is generally valid
-              if (end == 0)
-              {
-                // treat as non-positional feature, regardless.
-                start = 0;
-              }
-              try
-              {
-                score = new Float(st.nextToken()).floatValue();
-              } catch (NumberFormatException ex)
-              {
-                score = 0;
-              }
-
-              sf = new SequenceFeature(type, desc, start, end, score, group);
-
-              try
-              {
-                sf.setValue("STRAND", st.nextToken());
-                sf.setValue("FRAME", st.nextToken());
-              } catch (Exception ex)
-              {
-              }
-
-              if (st.hasMoreTokens())
-              {
-                StringBuffer attributes = new StringBuffer();
-                boolean sep = false;
-                while (st.hasMoreTokens())
-                {
-                  attributes.append((sep ? "\t" : "") + st.nextElement());
-                  sep = true;
-                }
-                // TODO validate and split GFF2 attributes field ? parse out
-                // ([A-Za-z][A-Za-z0-9_]*) <value> ; and add as
-                // sf.setValue(attrib, val);
-                sf.setValue("ATTRIBUTES", attributes.toString());
-              }
-
-              if (processOrAddSeqFeature(align, newseqs, seq, sf, GFFFile,
-                      relaxedIdmatching))
-              {
-                // check whether we should add the sequence feature to any other
-                // sequences in the alignment with the same or similar
-                while ((seq = align.findName(seq, seqId, true)) != null)
-                {
-                  seq.addSequenceFeature(new SequenceFeature(sf));
-                }
-              }
-              break;
-            }
-          }
-
-          if (GFFFile && seq == null)
-          {
-            desc = token;
-          }
-          else
-          {
-            desc = st.nextToken();
-          }
-          if (!st.hasMoreTokens())
-          {
-            System.err
-                    .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up.");
-            // in all probability, this isn't a file we understand, so bail
-            // quietly.
-            return false;
-          }
-
-          token = st.nextToken();
-
-          if (!token.equals("ID_NOT_SPECIFIED"))
-          {
-            seq = findName(align, seqId = token, relaxedIdmatching, null);
-            st.nextToken();
-          }
-          else
-          {
-            seqId = null;
-            try
-            {
-              index = Integer.parseInt(st.nextToken());
-              seq = align.getSequenceAt(index);
-            } catch (NumberFormatException ex)
-            {
-              seq = null;
-            }
-          }
-
-          if (seq == null)
-          {
-            System.out.println("Sequence not found: " + line);
-            break;
-          }
-
-          start = Integer.parseInt(st.nextToken());
-          end = Integer.parseInt(st.nextToken());
-
-          type = st.nextToken();
-
-          if (!colours.containsKey(type))
-          {
-            // Probably the old style groups file
-            UserColourScheme ucs = new UserColourScheme(type);
-            colours.put(type, ucs.findColour('A'));
-          }
-          sf = new SequenceFeature(type, desc, "", start, end, featureGroup);
-          if (st.hasMoreTokens())
-          {
-            try
-            {
-              score = new Float(st.nextToken()).floatValue();
-              // update colourgradient bounds if allowed to
-            } catch (NumberFormatException ex)
-            {
-              score = 0;
-            }
-            sf.setScore(score);
-          }
-          if (groupLink != null && removeHTML)
-          {
-            sf.addLink(groupLink);
-            sf.description += "%LINK%";
-          }
-          if (typeLink.containsKey(type) && removeHTML)
-          {
-            sf.addLink(typeLink.get(type).toString());
-            sf.description += "%LINK%";
-          }
-
-          parseDescriptionHTML(sf, removeHTML);
-
-          seq.addSequenceFeature(sf);
 
-          while (seqId != null
-                  && (seq = align.findName(seq, seqId, false)) != null)
-          {
-            seq.addSequenceFeature(new SequenceFeature(sf));
-          }
-          // If we got here, its not a GFFFile
-          GFFFile = false;
+        /*
+         * if not a comment, GFF pragma, startgroup, endgroup or feature
+         * colour specification, that just leaves a feature details line
+         * in either Jalview or GFF format
+         */
+        if (gffVersion == 0)
+        {
+          parseJalviewFeature(line, gffColumns, align, colours, removeHTML,
+                  relaxedIdmatching, featureGroup);
+        }
+        else
+        {
+          parseGff(gffColumns, align, relaxedIdmatching, newseqs);
         }
       }
       resetMatcher();
@@ -696,428 +269,377 @@ public class FeaturesFile extends AlignFile
       return false;
     }
 
+    /*
+     * experimental - add any dummy sequences with features to the alignment
+     * - we need them for Ensembl feature extraction - though maybe not otherwise
+     */
+    for (SequenceI newseq : newseqs)
+    {
+      if (newseq.getSequenceFeatures() != null)
+      {
+        align.addSequence(newseq);
+      }
+    }
     return true;
   }
 
-  private enum GffPragmas
-  {
-    gff_version, sequence_region, feature_ontology, attribute_ontology, source_ontology, species_build, fasta, hash
-  };
-
-  private static Map<String, GffPragmas> GFFPRAGMA;
-  static
+  /**
+   * Try to parse a Jalview format feature specification and add it as a
+   * sequence feature to any matching sequences in the alignment. Returns true
+   * if successful (a feature was added), or false if not.
+   * 
+   * @param line
+   * @param gffColumns
+   * @param alignment
+   * @param featureColours
+   * @param removeHTML
+   * @param relaxedIdmatching
+   * @param featureGroup
+   */
+  protected boolean parseJalviewFeature(String line, String[] gffColumns,
+          AlignmentI alignment, Map<String, Object> featureColours,
+          boolean removeHTML, boolean relaxedIdMatching, String featureGroup)
   {
-    GFFPRAGMA = new HashMap<String, GffPragmas>();
-    GFFPRAGMA.put("sequence-region", GffPragmas.sequence_region);
-    GFFPRAGMA.put("feature-ontology", GffPragmas.feature_ontology);
-    GFFPRAGMA.put("#", GffPragmas.hash);
-    GFFPRAGMA.put("fasta", GffPragmas.fasta);
-    GFFPRAGMA.put("species-build", GffPragmas.species_build);
-    GFFPRAGMA.put("source-ontology", GffPragmas.source_ontology);
-    GFFPRAGMA.put("attribute-ontology", GffPragmas.attribute_ontology);
-  }
+    /*
+     * tokens: description seqid seqIndex start end type [score]
+     */
+    if (gffColumns.length < 6)
+    {
+      System.err.println("Ignoring feature line '" + line
+              + "' with too few columns (" + gffColumns.length + ")");
+      return false;
+    }
+    String desc = gffColumns[0];
+    String seqId = gffColumns[1];
+    SequenceI seq = findSequence(seqId, alignment, null, relaxedIdMatching);
 
-  private void processGffPragma(String line, Map<String, String> gffProps,
-          AlignmentI align, ArrayList<SequenceI> newseqs)
-          throws IOException
-  {
-    // line starts with ##
-    int spacepos = line.indexOf(' ');
-    String pragma = spacepos == -1 ? line.substring(2).trim() : line
-            .substring(2, spacepos);
-    GffPragmas gffpragma = GFFPRAGMA.get(pragma.toLowerCase());
-    if (gffpragma == null)
+    if (!ID_NOT_SPECIFIED.equals(seqId))
     {
-      return;
+      seq = findSequence(seqId, alignment, null, relaxedIdMatching);
     }
-    switch (gffpragma)
+    else
     {
-    case gff_version:
+      seqId = null;
+      seq = null;
+      String seqIndex = gffColumns[2];
       try
       {
-        gffversion = Integer.parseInt(line.substring(spacepos + 1));
-      } finally
+        int idx = Integer.parseInt(seqIndex);
+        seq = alignment.getSequenceAt(idx);
+      } catch (NumberFormatException ex)
       {
-
+        System.err.println("Invalid sequence index: " + seqIndex);
       }
-      break;
-    case feature_ontology:
-      // resolve against specific feature ontology
-      break;
-    case attribute_ontology:
-      // resolve against specific attribute ontology
-      break;
-    case source_ontology:
-      // resolve against specific source ontology
-      break;
-    case species_build:
-      // resolve against specific NCBI taxon version
-      break;
-    case hash:
-      // close off any open feature hierarchies
-      break;
-    case fasta:
-      // process the rest of the file as a fasta file and replace any dummy
-      // sequence IDs
-      process_as_fasta(align, newseqs);
-      break;
-    default:
-      // we do nothing ?
-      System.err.println("Ignoring unknown pragma:\n" + line);
     }
-  }
 
-  private void process_as_fasta(AlignmentI align, List<SequenceI> newseqs)
-          throws IOException
-  {
-    try
+    if (seq == null)
     {
-      mark();
-    } catch (IOException q)
+      System.out.println("Sequence not found: " + line);
+      return false;
+    }
+
+    int startPos = Integer.parseInt(gffColumns[3]);
+    int endPos = Integer.parseInt(gffColumns[4]);
+
+    String ft = gffColumns[5];
+
+    if (!featureColours.containsKey(ft))
     {
+      /* 
+       * Perhaps an old style groups file with no colours -
+       * synthesize a colour from the feature type
+       */
+      UserColourScheme ucs = new UserColourScheme(ft);
+      featureColours.put(ft, ucs.findColour('A'));
     }
-    FastaFile parser = new FastaFile(this);
-    List<SequenceI> includedseqs = parser.getSeqs();
-    SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
-    // iterate over includedseqs, and replacing matching ones with newseqs
-    // sequences. Generic iterator not used here because we modify includedseqs
-    // as we go
-    for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
+    SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos,
+            endPos, featureGroup);
+    if (gffColumns.length > 6)
     {
-      // search for any dummy seqs that this sequence can be used to update
-      SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p));
-      if (dummyseq != null)
+      float score = Float.NaN;
+      try
       {
-        // dummyseq was created so it could be annotated and referred to in
-        // alignments/codon mappings
-
-        SequenceI mseq = includedseqs.get(p);
-        // mseq is the 'template' imported from the FASTA file which we'll use
-        // to coomplete dummyseq
-        if (dummyseq instanceof SequenceDummy)
-        {
-          // probably have the pattern wrong
-          // idea is that a flyweight proxy for a sequence ID can be created for
-          // 1. stable reference creation
-          // 2. addition of annotation
-          // 3. future replacement by a real sequence
-          // current pattern is to create SequenceDummy objects - a convenience
-          // constructor for a Sequence.
-          // problem is that when promoted to a real sequence, all references
-          // need
-          // to be updated somehow.
-          ((SequenceDummy) dummyseq).become(mseq);
-          includedseqs.set(p, dummyseq); // template is no longer needed
-        }
+        score = new Float(gffColumns[6]).floatValue();
+        // update colourgradient bounds if allowed to
+      } catch (NumberFormatException ex)
+      {
+        // leave as NaN
       }
+      sf.setScore(score);
     }
-    // finally add sequences to the dataset
-    for (SequenceI seq : includedseqs)
+
+    parseDescriptionHTML(sf, removeHTML);
+
+    seq.addSequenceFeature(sf);
+
+    while (seqId != null
+            && (seq = alignment.findName(seq, seqId, false)) != null)
     {
-      align.addSequence(seq);
+      seq.addSequenceFeature(new SequenceFeature(sf));
     }
+    return true;
   }
 
   /**
-   * take a sequence feature and examine its attributes to decide how it should
-   * be added to a sequence
+   * Process a feature type colour specification
    * 
-   * @param seq
-   *          - the destination sequence constructed or discovered in the
-   *          current context
-   * @param sf
-   *          - the base feature with ATTRIBUTES property containing any
-   *          additional attributes
-   * @param gFFFile
-   *          - true if we are processing a GFF annotation file
-   * @return true if sf was actually added to the sequence, false if it was
-   *         processed in another way
+   * @param line
+   *          the current input line (for error messages only)
+   * @param featureType
+   *          the first token on the line
+   * @param gffColumns
+   *          holds tokens on the line
+   * @param colours
+   *          map to which to add derived colour specification
    */
-  public boolean processOrAddSeqFeature(AlignmentI align,
-          List<SequenceI> newseqs, SequenceI seq, SequenceFeature sf,
-          boolean gFFFile, boolean relaxedIdMatching)
+  protected void parseFeatureColour(String line, String featureType,
+          String[] gffColumns, Map<String, Object> colours)
   {
-    String attr = (String) sf.getValue("ATTRIBUTES");
-    boolean add = true;
-    if (gFFFile && attr != null)
+    Object colour = null;
+    String colscheme = gffColumns[1];
+    if (colscheme.indexOf("|") > -1
+            || colscheme.trim().equalsIgnoreCase("label"))
     {
-      int nattr = 8;
-
-      for (String attset : attr.split("\t"))
-      {
-        if (attset == null || attset.trim().length() == 0)
-        {
-          continue;
-        }
-        nattr++;
-        Map<String, List<String>> set = new HashMap<String, List<String>>();
-        // normally, only expect one column - 9 - in this field
-        // the attributes (Gff3) or groups (gff2) field
-        for (String pair : attset.trim().split(";"))
-        {
-          pair = pair.trim();
-          if (pair.length() == 0)
-          {
-            continue;
-          }
-
-          // expect either space seperated (gff2) or '=' separated (gff3)
-          // key/value pairs here
-
-          int eqpos = pair.indexOf('='), sppos = pair.indexOf(' ');
-          String key = null, value = null;
-
-          if (sppos > -1 && (eqpos == -1 || sppos < eqpos))
-          {
-            key = pair.substring(0, sppos);
-            value = pair.substring(sppos + 1);
-          }
-          else
-          {
-            if (eqpos > -1 && (sppos == -1 || eqpos < sppos))
-            {
-              key = pair.substring(0, eqpos);
-              value = pair.substring(eqpos + 1);
-            }
-            else
-            {
-              key = pair;
-            }
-          }
-          if (key != null)
-          {
-            List<String> vals = set.get(key);
-            if (vals == null)
-            {
-              vals = new ArrayList<String>();
-              set.put(key, vals);
-            }
-            if (value != null)
-            {
-              vals.add(value.trim());
-            }
-          }
-        }
-        try
-        {
-          add &= processGffKey(set, nattr, seq, sf, align, newseqs,
-                  relaxedIdMatching); // process decides if
-                                      // feature is actually
-                                      // added
-        } catch (InvalidGFF3FieldException ivfe)
-        {
-          System.err.println(ivfe);
-        }
-      }
+      colour = parseGraduatedColourScheme(line, colscheme);
     }
-    if (add)
+    else
     {
-      seq.addSequenceFeature(sf);
+      UserColourScheme ucs = new UserColourScheme(colscheme);
+      colour = ucs.findColour('A');
     }
-    return add;
-  }
-
-  public class InvalidGFF3FieldException extends Exception
-  {
-    String field, value;
-
-    public InvalidGFF3FieldException(String field,
-            Map<String, List<String>> set, String message)
+    if (colour != null)
     {
-      super(message + " (Field was " + field + " and value was "
-              + set.get(field).toString());
-      this.field = field;
-      this.value = set.get(field).toString();
+      colours.put(featureType, colour);
     }
-
   }
 
   /**
-   * take a set of keys for a feature and interpret them
+   * Parse a Jalview graduated colour descriptor
    * 
-   * @param set
-   * @param nattr
-   * @param seq
-   * @param sf
+   * @param line
+   * @param colourDescriptor
    * @return
    */
-  public boolean processGffKey(Map<String, List<String>> set, int nattr,
-          SequenceI seq, SequenceFeature sf, AlignmentI align,
-          List<SequenceI> newseqs, boolean relaxedIdMatching)
-          throws InvalidGFF3FieldException
+  protected GraduatedColor parseGraduatedColourScheme(String line,
+          String colourDescriptor)
   {
-    String attr;
-    // decide how to interpret according to type
-    if (sf.getType().equals("similarity"))
+    // Parse '|' separated graduated colourscheme fields:
+    // [label|][mincolour|maxcolour|[absolute|]minvalue|maxvalue|thresholdtype|thresholdvalue]
+    // can either provide 'label' only, first is optional, next two
+    // colors are required (but may be
+    // left blank), next is optional, nxt two min/max are required.
+    // first is either 'label'
+    // first/second and third are both hexadecimal or word equivalent
+    // colour.
+    // next two are values parsed as floats.
+    // fifth is either 'above','below', or 'none'.
+    // sixth is a float value and only required when fifth is either
+    // 'above' or 'below'.
+    StringTokenizer gcol = new StringTokenizer(colourDescriptor, "|", true);
+    // set defaults
+    float min = Float.MIN_VALUE, max = Float.MAX_VALUE;
+    boolean labelCol = false;
+    // Parse spec line
+    String mincol = gcol.nextToken();
+    if (mincol == "|")
     {
-      int strand = sf.getStrand();
-      // exonerate cdna/protein map
-      // look for fields
-      List<SequenceI> querySeq = findNames(align, newseqs,
-              relaxedIdMatching, set.get(attr = "Query"));
-      if (querySeq == null || querySeq.size() != 1)
-      {
-        throw new InvalidGFF3FieldException(attr, set,
-                "Expecting exactly one sequence in Query field (got "
-                        + set.get(attr) + ")");
-      }
-      if (set.containsKey(attr = "Align"))
-      {
-        // process the align maps and create cdna/protein maps
-        // ideally, the query sequences are in the alignment, but maybe not...
-
-        AlignedCodonFrame alco = new AlignedCodonFrame();
-        MapList codonmapping = constructCodonMappingFromAlign(set, attr,
-                strand);
-
-        // add codon mapping, and hope!
-        alco.addMap(seq, querySeq.get(0), codonmapping);
-        align.addCodonFrame(alco);
-        // everything that's needed to be done is done
-        // no features to create here !
-        return false;
-      }
-
+      System.err
+              .println("Expected either 'label' or a colour specification in the line: "
+                      + line);
+      return null;
     }
-    return true;
-  }
-
-  private MapList constructCodonMappingFromAlign(
-          Map<String, List<String>> set, String attr, int strand)
-          throws InvalidGFF3FieldException
-  {
-    if (strand == 0)
+    String maxcol = null;
+    if (mincol.toLowerCase().indexOf("label") == 0)
     {
-      throw new InvalidGFF3FieldException(attr, set,
-              "Invalid strand for a codon mapping (cannot be 0)");
+      labelCol = true;
+      mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null); // skip '|'
+      mincol = (gcol.hasMoreTokens() ? gcol.nextToken() : null);
     }
-    List<Integer> fromrange = new ArrayList<Integer>(), torange = new ArrayList<Integer>();
-    int lastppos = 0, lastpframe = 0;
-    for (String range : set.get(attr))
+    String abso = null, minval, maxval;
+    if (mincol != null)
     {
-      List<Integer> ints = new ArrayList<Integer>();
-      StringTokenizer st = new StringTokenizer(range, " ");
-      while (st.hasMoreTokens())
+      // at least four more tokens
+      if (mincol.equals("|"))
       {
-        String num = st.nextToken();
-        try
-        {
-          ints.add(new Integer(num));
-        } catch (NumberFormatException nfe)
-        {
-          throw new InvalidGFF3FieldException(attr, set,
-                  "Invalid number in field " + num);
-        }
+        mincol = "";
       }
-      // Align positionInRef positionInQuery LengthInRef
-      // contig_1146 exonerate:protein2genome:local similarity 8534 11269
-      // 3652 - . alignment_id 0 ;
-      // Query DDB_G0269124
-      // Align 11270 143 120
-      // corresponds to : 120 bases align at pos 143 in protein to 11270 on
-      // dna in strand direction
-      // Align 11150 187 282
-      // corresponds to : 282 bases align at pos 187 in protein to 11150 on
-      // dna in strand direction
-      //
-      // Align 10865 281 888
-      // Align 9977 578 1068
-      // Align 8909 935 375
-      //
-      if (ints.size() != 3)
+      else
       {
-        throw new InvalidGFF3FieldException(attr, set,
-                "Invalid number of fields for this attribute ("
-                        + ints.size() + ")");
+        gcol.nextToken(); // skip next '|'
       }
-      fromrange.add(new Integer(ints.get(0).intValue()));
-      fromrange.add(new Integer(ints.get(0).intValue() + strand
-              * ints.get(2).intValue()));
-      // how are intron/exon boundaries that do not align in codons
-      // represented
-      if (ints.get(1).equals(lastppos) && lastpframe > 0)
+      // continue parsing rest of line
+      maxcol = gcol.nextToken();
+      if (maxcol.equals("|"))
       {
-        // extend existing to map
-        lastppos += ints.get(2) / 3;
-        lastpframe = ints.get(2) % 3;
-        torange.set(torange.size() - 1, new Integer(lastppos));
+        maxcol = "";
       }
       else
       {
-        // new to map range
-        torange.add(ints.get(1));
-        lastppos = ints.get(1) + ints.get(2) / 3;
-        lastpframe = ints.get(2) % 3;
-        torange.add(new Integer(lastppos));
+        gcol.nextToken(); // skip next '|'
+      }
+      abso = gcol.nextToken();
+      gcol.nextToken(); // skip next '|'
+      if (abso.toLowerCase().indexOf("abso") != 0)
+      {
+        minval = abso;
+        abso = null;
+      }
+      else
+      {
+        minval = gcol.nextToken();
+        gcol.nextToken(); // skip next '|'
+      }
+      maxval = gcol.nextToken();
+      if (gcol.hasMoreTokens())
+      {
+        gcol.nextToken(); // skip next '|'
+      }
+      try
+      {
+        if (minval.length() > 0)
+        {
+          min = Float.valueOf(minval);
+        }
+      } catch (Exception e)
+      {
+        System.err
+                .println("Couldn't parse the minimum value for graduated colour for type ("
+                        + colourDescriptor
+                        + ") - did you misspell 'auto' for the optional automatic colour switch ?");
+        e.printStackTrace();
+      }
+      try
+      {
+        if (maxval.length() > 0)
+        {
+          max = Float.valueOf(maxval);
+        }
+      } catch (Exception e)
+      {
+        System.err
+                .println("Couldn't parse the maximum value for graduated colour for type ("
+                        + colourDescriptor + ")");
+        e.printStackTrace();
       }
     }
-    // from and to ranges must end up being a series of start/end intervals
-    if (fromrange.size() % 2 == 1)
-    {
-      throw new InvalidGFF3FieldException(attr, set,
-              "Couldn't parse the DNA alignment range correctly");
-    }
-    if (torange.size() % 2 == 1)
+    else
     {
-      throw new InvalidGFF3FieldException(attr, set,
-              "Couldn't parse the protein alignment range correctly");
+      // add in some dummy min/max colours for the label-only
+      // colourscheme.
+      mincol = "FFFFFF";
+      maxcol = "000000";
     }
-    // finally, build the map
-    int[] frommap = new int[fromrange.size()], tomap = new int[torange
-            .size()];
-    int p = 0;
-    for (Integer ip : fromrange)
+
+    GraduatedColor colour = null;
+    try
     {
-      frommap[p++] = ip.intValue();
-    }
-    p = 0;
-    for (Integer ip : torange)
+      colour = new GraduatedColor(
+              new UserColourScheme(mincol).findColour('A'),
+              new UserColourScheme(maxcol).findColour('A'), min, max);
+    } catch (Exception e)
     {
-      tomap[p++] = ip.intValue();
+      System.err.println("Couldn't parse the graduated colour scheme ("
+              + colourDescriptor + ")");
+      e.printStackTrace();
     }
-
-    return new MapList(frommap, tomap, 3, 1);
-  }
-
-  private List<SequenceI> findNames(AlignmentI align,
-          List<SequenceI> newseqs, boolean relaxedIdMatching,
-          List<String> list)
-  {
-    List<SequenceI> found = new ArrayList<SequenceI>();
-    for (String seqId : list)
+    if (colour != null)
     {
-      SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs);
-      if (seq != null)
+      colour.setColourByLabel(labelCol);
+      colour.setAutoScaled(abso == null);
+      // add in any additional parameters
+      String ttype = null, tval = null;
+      if (gcol.hasMoreTokens())
+      {
+        // threshold type and possibly a threshold value
+        ttype = gcol.nextToken();
+        if (ttype.toLowerCase().startsWith("below"))
+        {
+          colour.setThreshType(AnnotationColourGradient.BELOW_THRESHOLD);
+        }
+        else if (ttype.toLowerCase().startsWith("above"))
+        {
+          colour.setThreshType(AnnotationColourGradient.ABOVE_THRESHOLD);
+        }
+        else
+        {
+          colour.setThreshType(AnnotationColourGradient.NO_THRESHOLD);
+          if (!ttype.toLowerCase().startsWith("no"))
+          {
+            System.err.println("Ignoring unrecognised threshold type : "
+                    + ttype);
+          }
+        }
+      }
+      if (colour.getThreshType() != AnnotationColourGradient.NO_THRESHOLD)
+      {
+        try
+        {
+          gcol.nextToken();
+          tval = gcol.nextToken();
+          colour.setThresh(new Float(tval).floatValue());
+        } catch (Exception e)
+        {
+          System.err.println("Couldn't parse threshold value as a float: ("
+                  + tval + ")");
+          e.printStackTrace();
+        }
+      }
+      // parse the thresh-is-min token ?
+      if (gcol.hasMoreTokens())
       {
-        found.add(seq);
+        System.err
+                .println("Ignoring additional tokens in parameters in graduated colour specification\n");
+        while (gcol.hasMoreTokens())
+        {
+          System.err.println("|" + gcol.nextToken());
+        }
+        System.err.println("\n");
       }
     }
-    return found;
+    return colour;
   }
 
-  private AlignmentI lastmatchedAl = null;
-
-  private SequenceIdMatcher matcher = null;
-
   /**
    * clear any temporary handles used to speed up ID matching
    */
-  private void resetMatcher()
+  protected void resetMatcher()
   {
     lastmatchedAl = null;
     matcher = null;
   }
 
-  private SequenceI findName(AlignmentI align, String seqId,
-          boolean relaxedIdMatching, List<SequenceI> newseqs)
+  /**
+   * Returns a sequence matching the given id, as follows
+   * <ul>
+   * <li>strict matching is on exact sequence name</li>
+   * <li>relaxed matching allows matching on a token within the sequence name,
+   * or a dbxref</li>
+   * <li>first tries to find a match in the alignment sequences</li>
+   * <li>else tries to find a match in the new sequences already generated while
+   * parsing the features file</li>
+   * <li>else creates a new placeholder sequence, adds it to the new sequences
+   * list, and returns it</li>
+   * </ul>
+   * 
+   * @param seqId
+   * @param align
+   * @param newseqs
+   * @param relaxedIdMatching
+   * 
+   * @return
+   */
+  protected SequenceI findSequence(String seqId, AlignmentI align,
+          List<SequenceI> newseqs, boolean relaxedIdMatching)
   {
+    // TODO encapsulate in SequenceIdMatcher, share the matcher
+    // with the GffHelper (removing code duplication)
     SequenceI match = null;
     if (relaxedIdMatching)
     {
       if (lastmatchedAl != align)
       {
-        matcher = new SequenceIdMatcher(
-                (lastmatchedAl = align).getSequencesArray());
+        lastmatchedAl = align;
+        matcher = new SequenceIdMatcher(align.getSequencesArray());
         if (newseqs != null)
         {
           matcher.addAll(newseqs);
@@ -1159,7 +681,7 @@ public class FeaturesFile extends AlignFile
     {
       return;
     }
-    jalview.util.ParseHtmlBodyAndLinks parsed = new jalview.util.ParseHtmlBodyAndLinks(
+    ParseHtmlBodyAndLinks parsed = new ParseHtmlBodyAndLinks(
             sf.getDescription(), removeHTML, newline);
 
     sf.description = (removeHTML) ? parsed.getNonHtmlContent()
@@ -1174,22 +696,22 @@ public class FeaturesFile extends AlignFile
   /**
    * generate a features file for seqs includes non-pos features by default.
    * 
-   * @param seqs
+   * @param sequences
    *          source of sequence features
    * @param visible
    *          hash of feature types and colours
    * @return features file contents
    */
-  public String printJalviewFormat(SequenceI[] seqs,
+  public String printJalviewFormat(SequenceI[] sequences,
           Map<String, Object> visible)
   {
-    return printJalviewFormat(seqs, visible, true, true);
+    return printJalviewFormat(sequences, visible, true, true);
   }
 
   /**
    * generate a features file for seqs with colours from visible (if any)
    * 
-   * @param seqs
+   * @param sequences
    *          source of features
    * @param visible
    *          hash of Colours for each feature type
@@ -1200,11 +722,10 @@ public class FeaturesFile extends AlignFile
    *          of group or type)
    * @return features file contents
    */
-  public String printJalviewFormat(SequenceI[] seqs, Map visible,
-          boolean visOnly, boolean nonpos)
+  public String printJalviewFormat(SequenceI[] sequences,
+          Map<String, Object> visible, boolean visOnly, boolean nonpos)
   {
-    StringBuffer out = new StringBuffer();
-    SequenceFeature[] next;
+    StringBuilder out = new StringBuilder(256);
     boolean featuresGen = false;
     if (visOnly && !nonpos && (visible == null || visible.size() < 1))
     {
@@ -1217,15 +738,15 @@ public class FeaturesFile extends AlignFile
       // write feature colours only if we're given them and we are generating
       // viewed features
       // TODO: decide if feature links should also be written here ?
-      Iterator en = visible.keySet().iterator();
-      String type, color;
+      Iterator<String> en = visible.keySet().iterator();
+      String featureType, color;
       while (en.hasNext())
       {
-        type = en.next().toString();
+        featureType = en.next().toString();
 
-        if (visible.get(type) instanceof GraduatedColor)
+        if (visible.get(featureType) instanceof GraduatedColor)
         {
-          GraduatedColor gc = (GraduatedColor) visible.get(type);
+          GraduatedColor gc = (GraduatedColor) visible.get(featureType);
           color = (gc.isColourByLabel() ? "label|" : "")
                   + Format.getHexString(gc.getMinColor()) + "|"
                   + Format.getHexString(gc.getMaxColor())
@@ -1254,46 +775,47 @@ public class FeaturesFile extends AlignFile
             color += "none";
           }
         }
-        else if (visible.get(type) instanceof java.awt.Color)
+        else if (visible.get(featureType) instanceof Color)
         {
-          color = Format.getHexString((java.awt.Color) visible.get(type));
+          color = Format.getHexString((Color) visible.get(featureType));
         }
         else
         {
           // legacy support for integer objects containing colour triplet values
-          color = Format.getHexString(new java.awt.Color(Integer
-                  .parseInt(visible.get(type).toString())));
+          color = Format.getHexString(new Color(Integer.parseInt(visible
+                  .get(featureType).toString())));
         }
-        out.append(type);
-        out.append("\t");
+        out.append(featureType);
+        out.append(TAB);
         out.append(color);
         out.append(newline);
       }
     }
     // Work out which groups are both present and visible
-    Vector groups = new Vector();
+    List<String> groups = new ArrayList<String>();
     int groupIndex = 0;
     boolean isnonpos = false;
 
-    for (int i = 0; i < seqs.length; i++)
+    SequenceFeature[] features;
+    for (int i = 0; i < sequences.length; i++)
     {
-      next = seqs[i].getSequenceFeatures();
-      if (next != null)
+      features = sequences[i].getSequenceFeatures();
+      if (features != null)
       {
-        for (int j = 0; j < next.length; j++)
+        for (int j = 0; j < features.length; j++)
         {
-          isnonpos = next[j].begin == 0 && next[j].end == 0;
+          isnonpos = features[j].begin == 0 && features[j].end == 0;
           if ((!nonpos && isnonpos)
                   || (!isnonpos && visOnly && !visible
-                          .containsKey(next[j].type)))
+                          .containsKey(features[j].type)))
           {
             continue;
           }
 
-          if (next[j].featureGroup != null
-                  && !groups.contains(next[j].featureGroup))
+          if (features[j].featureGroup != null
+                  && !groups.contains(features[j].featureGroup))
           {
-            groups.addElement(next[j].featureGroup);
+            groups.add(features[j].featureGroup);
           }
         }
       }
@@ -1302,12 +824,11 @@ public class FeaturesFile extends AlignFile
     String group = null;
     do
     {
-
       if (groups.size() > 0 && groupIndex < groups.size())
       {
-        group = groups.elementAt(groupIndex).toString();
+        group = groups.get(groupIndex);
         out.append(newline);
-        out.append("STARTGROUP\t");
+        out.append("STARTGROUP").append(TAB);
         out.append(group);
         out.append(newline);
       }
@@ -1316,17 +837,17 @@ public class FeaturesFile extends AlignFile
         group = null;
       }
 
-      for (int i = 0; i < seqs.length; i++)
+      for (int i = 0; i < sequences.length; i++)
       {
-        next = seqs[i].getSequenceFeatures();
-        if (next != null)
+        features = sequences[i].getSequenceFeatures();
+        if (features != null)
         {
-          for (int j = 0; j < next.length; j++)
+          for (int j = 0; j < features.length; j++)
           {
-            isnonpos = next[j].begin == 0 && next[j].end == 0;
+            isnonpos = features[j].begin == 0 && features[j].end == 0;
             if ((!nonpos && isnonpos)
                     || (!isnonpos && visOnly && !visible
-                            .containsKey(next[j].type)))
+                            .containsKey(features[j].type)))
             {
               // skip if feature is nonpos and we ignore them or if we only
               // output visible and it isn't non-pos and it's not visible
@@ -1334,65 +855,65 @@ public class FeaturesFile extends AlignFile
             }
 
             if (group != null
-                    && (next[j].featureGroup == null || !next[j].featureGroup
+                    && (features[j].featureGroup == null || !features[j].featureGroup
                             .equals(group)))
             {
               continue;
             }
 
-            if (group == null && next[j].featureGroup != null)
+            if (group == null && features[j].featureGroup != null)
             {
               continue;
             }
             // we have features to output
             featuresGen = true;
-            if (next[j].description == null
-                    || next[j].description.equals(""))
+            if (features[j].description == null
+                    || features[j].description.equals(""))
             {
-              out.append(next[j].type + "\t");
+              out.append(features[j].type).append(TAB);
             }
             else
             {
-              if (next[j].links != null
-                      && next[j].getDescription().indexOf("<html>") == -1)
+              if (features[j].links != null
+                      && features[j].getDescription().indexOf("<html>") == -1)
               {
                 out.append("<html>");
               }
 
-              out.append(next[j].description + " ");
-              if (next[j].links != null)
+              out.append(features[j].description + " ");
+              if (features[j].links != null)
               {
-                for (int l = 0; l < next[j].links.size(); l++)
+                for (int l = 0; l < features[j].links.size(); l++)
                 {
-                  String label = next[j].links.elementAt(l).toString();
+                  String label = features[j].links.elementAt(l).toString();
                   String href = label.substring(label.indexOf("|") + 1);
                   label = label.substring(0, label.indexOf("|"));
 
-                  if (next[j].description.indexOf(href) == -1)
+                  if (features[j].description.indexOf(href) == -1)
                   {
                     out.append("<a href=\"" + href + "\">" + label + "</a>");
                   }
                 }
 
-                if (next[j].getDescription().indexOf("</html>") == -1)
+                if (features[j].getDescription().indexOf("</html>") == -1)
                 {
                   out.append("</html>");
                 }
               }
 
-              out.append("\t");
+              out.append(TAB);
             }
-            out.append(seqs[i].getName());
+            out.append(sequences[i].getName());
             out.append("\t-1\t");
-            out.append(next[j].begin);
-            out.append("\t");
-            out.append(next[j].end);
-            out.append("\t");
-            out.append(next[j].type);
-            if (!Float.isNaN(next[j].score))
+            out.append(features[j].begin);
+            out.append(TAB);
+            out.append(features[j].end);
+            out.append(TAB);
+            out.append(features[j].type);
+            if (!Float.isNaN(features[j].score))
             {
-              out.append("\t");
-              out.append(next[j].score);
+              out.append(TAB);
+              out.append(features[j].score);
             }
             out.append(newline);
           }
@@ -1401,7 +922,7 @@ public class FeaturesFile extends AlignFile
 
       if (group != null)
       {
-        out.append("ENDGROUP\t");
+        out.append("ENDGROUP").append(TAB);
         out.append(group);
         out.append(newline);
         groupIndex++;
@@ -1422,110 +943,492 @@ public class FeaturesFile extends AlignFile
   }
 
   /**
-   * generate a gff file for sequence features includes non-pos features by
-   * default.
+   * Parse method that is called when a GFF file is dragged to the desktop
+   */
+  @Override
+  public void parse()
+  {
+    AlignViewportI av = getViewport();
+    if (av != null)
+    {
+      if (av.getAlignment() != null)
+      {
+        dataset = av.getAlignment().getDataset();
+      }
+      if (dataset == null)
+      {
+        // working in the applet context ?
+        dataset = av.getAlignment();
+      }
+    }
+    else
+    {
+      dataset = new Alignment(new SequenceI[] {});
+    }
+
+    boolean parseResult = parse(dataset, null, false, true);
+    if (!parseResult)
+    {
+      // pass error up somehow
+    }
+    if (av != null)
+    {
+      // update viewport with the dataset data ?
+    }
+    else
+    {
+      setSeqs(dataset.getSequencesArray());
+    }
+  }
+
+  /**
+   * Implementation of unused abstract method
+   * 
+   * @return error message
+   */
+  @Override
+  public String print()
+  {
+    return "Use printGffFormat() or printJalviewFormat()";
+  }
+
+  /**
+   * Returns features output in GFF2 format, including hidden and non-positional
+   * features
    * 
-   * @param seqs
+   * @param sequences
+   *          the sequences whose features are to be output
    * @param visible
+   *          a map whose keys are the type names of visible features
    * @return
    */
-  public String printGFFFormat(SequenceI[] seqs, Map<String, Object> visible)
+  public String printGffFormat(SequenceI[] sequences,
+          Map<String, Object> visible)
   {
-    return printGFFFormat(seqs, visible, true, true);
+    return printGffFormat(sequences, visible, true, true);
   }
 
-  public String printGFFFormat(SequenceI[] seqs,
-          Map<String, Object> visible, boolean visOnly, boolean nonpos)
+  /**
+   * Returns features output in GFF2 format
+   * 
+   * @param sequences
+   *          the sequences whose features are to be output
+   * @param visible
+   *          a map whose keys are the type names of visible features
+   * @param outputVisibleOnly
+   * @param includeNonPositionalFeatures
+   * @return
+   */
+  public String printGffFormat(SequenceI[] sequences,
+          Map<String, Object> visible, boolean outputVisibleOnly,
+          boolean includeNonPositionalFeatures)
   {
-    StringBuffer out = new StringBuffer();
-    SequenceFeature[] next;
+    StringBuilder out = new StringBuilder(256);
+    out.append(String.format("%s %d\n", GFF_VERSION, gffVersion));
     String source;
     boolean isnonpos;
-    for (int i = 0; i < seqs.length; i++)
+    for (SequenceI seq : sequences)
     {
-      if (seqs[i].getSequenceFeatures() != null)
+      SequenceFeature[] features = seq.getSequenceFeatures();
+      if (features != null)
       {
-        next = seqs[i].getSequenceFeatures();
-        for (int j = 0; j < next.length; j++)
+        for (SequenceFeature sf : features)
         {
-          isnonpos = next[j].begin == 0 && next[j].end == 0;
-          if ((!nonpos && isnonpos)
-                  || (!isnonpos && visOnly && !visible
-                          .containsKey(next[j].type)))
+          isnonpos = sf.begin == 0 && sf.end == 0;
+          if (!includeNonPositionalFeatures && isnonpos)
+          {
+            /*
+             * ignore non-positional features if not wanted
+             */
+            continue;
+          }
+          // TODO why the test !isnonpos here?
+          // what about not visible non-positional features?
+          if (!isnonpos && outputVisibleOnly
+                  && !visible.containsKey(sf.type))
           {
+            /*
+             * ignore not visible features if not wanted
+             */
             continue;
           }
 
-          source = next[j].featureGroup;
+          source = sf.featureGroup;
           if (source == null)
           {
-            source = next[j].getDescription();
+            source = sf.getDescription();
           }
 
-          out.append(seqs[i].getName());
-          out.append("\t");
+          out.append(seq.getName());
+          out.append(TAB);
           out.append(source);
-          out.append("\t");
-          out.append(next[j].type);
-          out.append("\t");
-          out.append(next[j].begin);
-          out.append("\t");
-          out.append(next[j].end);
-          out.append("\t");
-          out.append(next[j].score);
-          out.append("\t");
-
-          if (next[j].getValue("STRAND") != null)
-          {
-            out.append(next[j].getValue("STRAND"));
-            out.append("\t");
-          }
-          else
+          out.append(TAB);
+          out.append(sf.type);
+          out.append(TAB);
+          out.append(sf.begin);
+          out.append(TAB);
+          out.append(sf.end);
+          out.append(TAB);
+          out.append(sf.score);
+          out.append(TAB);
+
+          int strand = sf.getStrand();
+          out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
+          out.append(TAB);
+
+          String phase = sf.getPhase();
+          out.append(phase == null ? "." : phase);
+
+          // miscellaneous key-values (GFF column 9)
+          String attributes = sf.getAttributes();
+          if (attributes != null)
           {
-            out.append(".\t");
+            out.append(TAB).append(attributes);
           }
 
-          if (next[j].getValue("FRAME") != null)
-          {
-            out.append(next[j].getValue("FRAME"));
-          }
-          else
-          {
-            out.append(".");
-          }
-          // TODO: verify/check GFF - should there be a /t here before attribute
-          // output ?
+          out.append(newline);
+        }
+      }
+    }
 
-          if (next[j].getValue("ATTRIBUTES") != null)
-          {
-            out.append(next[j].getValue("ATTRIBUTES"));
-          }
+    return out.toString();
+  }
 
-          out.append(newline);
+  /**
+   * Returns a mapping given list of one or more Align descriptors (exonerate
+   * format)
+   * 
+   * @param alignedRegions
+   *          a list of "Align fromStart toStart fromCount"
+   * @param mapIsFromCdna
+   *          if true, 'from' is dna, else 'from' is protein
+   * @param strand
+   *          either 1 (forward) or -1 (reverse)
+   * @return
+   * @throws IOException
+   */
+  protected MapList constructCodonMappingFromAlign(
+          List<String> alignedRegions, boolean mapIsFromCdna, int strand)
+          throws IOException
+  {
+    if (strand == 0)
+    {
+      throw new IOException(
+              "Invalid strand for a codon mapping (cannot be 0)");
+    }
+    int regions = alignedRegions.size();
+    // arrays to hold [start, end] for each aligned region
+    int[] fromRanges = new int[regions * 2]; // from dna
+    int[] toRanges = new int[regions * 2]; // to protein
+    int fromRangesIndex = 0;
+    int toRangesIndex = 0;
+
+    for (String range : alignedRegions)
+    {
+      /* 
+       * Align mapFromStart mapToStart mapFromCount
+       * e.g. if mapIsFromCdna
+       *     Align 11270 143 120
+       * means:
+       *     120 bases from pos 11270 align to pos 143 in peptide
+       * if !mapIsFromCdna this would instead be
+       *     Align 143 11270 40 
+       */
+      String[] tokens = range.split(" ");
+      if (tokens.length != 3)
+      {
+        throw new IOException("Wrong number of fields for Align");
+      }
+      int fromStart = 0;
+      int toStart = 0;
+      int fromCount = 0;
+      try
+      {
+        fromStart = Integer.parseInt(tokens[0]);
+        toStart = Integer.parseInt(tokens[1]);
+        fromCount = Integer.parseInt(tokens[2]);
+      } catch (NumberFormatException nfe)
+      {
+        throw new IOException("Invalid number in Align field: "
+                + nfe.getMessage());
+      }
+
+      /*
+       * Jalview always models from dna to protein, so adjust values if the
+       * GFF mapping is from protein to dna
+       */
+      if (!mapIsFromCdna)
+      {
+        fromCount *= 3;
+        int temp = fromStart;
+        fromStart = toStart;
+        toStart = temp;
+      }
+      fromRanges[fromRangesIndex++] = fromStart;
+      fromRanges[fromRangesIndex++] = fromStart + strand * (fromCount - 1);
+
+      /*
+       * If a codon has an intron gap, there will be contiguous 'toRanges';
+       * this is handled for us by the MapList constructor. 
+       * (It is not clear that exonerate ever generates this case)  
+       */
+      toRanges[toRangesIndex++] = toStart;
+      toRanges[toRangesIndex++] = toStart + (fromCount - 1) / 3;
+    }
+
+    return new MapList(fromRanges, toRanges, 3, 1);
+  }
+
+  /**
+   * Parse a GFF format feature. This may include creating a 'dummy' sequence to
+   * hold the feature, or for its mapped sequence, or both, to be resolved
+   * either later in the GFF file (##FASTA section), or when the user loads
+   * additional sequences.
+   * 
+   * @param gffColumns
+   * @param alignment
+   * @param relaxedIdMatching
+   * @param newseqs
+   * @return
+   */
+  protected SequenceI parseGff(String[] gffColumns, AlignmentI alignment,
+          boolean relaxedIdMatching, List<SequenceI> newseqs)
+  {
+    /*
+     * GFF: seqid source type start end score strand phase [attributes]
+     */
+    if (gffColumns.length < 5)
+    {
+      System.err.println("Ignoring GFF feature line with too few columns ("
+              + gffColumns.length + ")");
+      return null;
+    }
 
+    /*
+     * locate referenced sequence in alignment _or_ 
+     * as a forward or external reference (SequenceDummy)
+     */
+    String seqId = gffColumns[0];
+    SequenceI seq = findSequence(seqId, alignment, newseqs,
+            relaxedIdMatching);
+
+    SequenceFeature sf = null;
+    GffHelperI helper = GffHelperFactory.getHelper(gffColumns);
+    if (helper != null)
+    {
+      try
+      {
+        sf = helper.processGff(seq, gffColumns, alignment, newseqs,
+                relaxedIdMatching);
+        if (sf != null)
+        {
+          seq.addSequenceFeature(sf);
+          while ((seq = alignment.findName(seq, seqId, true)) != null)
+          {
+            seq.addSequenceFeature(new SequenceFeature(sf));
+          }
         }
+      } catch (IOException e)
+      {
+        System.err.println("GFF parsing failed with: " + e.getMessage());
+        return null;
       }
     }
 
-    return out.toString();
+    return seq;
   }
 
   /**
-   * this is only for the benefit of object polymorphism - method does nothing.
+   * Process the 'column 9' data of the GFF file. This is less formally defined,
+   * and its interpretation will vary depending on the tool that has generated
+   * it.
+   * 
+   * @param attributes
+   * @param sf
    */
-  public void parse()
+  protected void processGffColumnNine(String attributes, SequenceFeature sf)
   {
-    // IGNORED
+    sf.setAttributes(attributes);
+
+    /*
+     * Parse attributes in column 9 and add them to the sequence feature's 
+     * 'otherData' table; use Note as a best proxy for description
+     */
+    char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
+    // TODO check we don't break GFF2 values which include commas here
+    Map<String, List<String>> nameValues = GffHelperBase
+            .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
+    for (Entry<String, List<String>> attr : nameValues.entrySet())
+    {
+      String values = StringUtils.listToDelimitedString(attr.getValue(),
+              "; ");
+      sf.setValue(attr.getKey(), values);
+      if (NOTE.equals(attr.getKey()))
+      {
+        sf.setDescription(values);
+      }
+    }
   }
 
   /**
-   * this is only for the benefit of object polymorphism - method does nothing.
+   * After encountering ##fasta in a GFF3 file, process the remainder of the
+   * file as FAST sequence data. Any placeholder sequences created during
+   * feature parsing are updated with the actual sequences.
    * 
-   * @return error message
+   * @param align
+   * @param newseqs
+   * @throws IOException
    */
-  public String print()
+  protected void processAsFasta(AlignmentI align, List<SequenceI> newseqs)
+          throws IOException
   {
-    return "USE printGFFFormat() or printJalviewFormat()";
+    try
+    {
+      mark();
+    } catch (IOException q)
+    {
+    }
+    FastaFile parser = new FastaFile(this);
+    List<SequenceI> includedseqs = parser.getSeqs();
+
+    SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
+
+    /*
+     * iterate over includedseqs, and replacing matching ones with newseqs
+     * sequences. Generic iterator not used here because we modify
+     * includedseqs as we go
+     */
+    for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
+    {
+      // search for any dummy seqs that this sequence can be used to update
+      SequenceI includedSeq = includedseqs.get(p);
+      SequenceI dummyseq = smatcher.findIdMatch(includedSeq);
+      if (dummyseq != null && dummyseq instanceof SequenceDummy)
+      {
+        // probably have the pattern wrong
+        // idea is that a flyweight proxy for a sequence ID can be created for
+        // 1. stable reference creation
+        // 2. addition of annotation
+        // 3. future replacement by a real sequence
+        // current pattern is to create SequenceDummy objects - a convenience
+        // constructor for a Sequence.
+        // problem is that when promoted to a real sequence, all references
+        // need to be updated somehow. We avoid that by keeping the same object.
+        ((SequenceDummy) dummyseq).become(includedSeq);
+        dummyseq.createDatasetSequence();
+
+        /*
+         * Update mappings so they are now to the dataset sequence
+         */
+        for (AlignedCodonFrame mapping : align.getCodonFrames())
+        {
+          mapping.updateToDataset(dummyseq);
+        }
+
+        /*
+         * replace parsed sequence with the realised forward reference
+         */
+        includedseqs.set(p, dummyseq);
+
+        /*
+         * and remove from the newseqs list
+         */
+        newseqs.remove(dummyseq);
+      }
+    }
+
+    /*
+     * finally add sequences to the dataset
+     */
+    for (SequenceI seq : includedseqs)
+    {
+      // experimental: mapping-based 'alignment' to query sequence
+      AlignmentUtils.alignSequenceAs(seq, align,
+              String.valueOf(align.getGapCharacter()), false, true);
+
+      // rename sequences if GFF handler requested this
+      // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ?
+      SequenceFeature[] sfs = seq.getSequenceFeatures();
+      if (sfs != null)
+      {
+        String newName = (String) sfs[0].getValue(GffHelperI.RENAME_TOKEN);
+        if (newName != null)
+        {
+          seq.setName(newName);
+        }
+      }
+      align.addSequence(seq);
+    }
   }
 
+  /**
+   * Process a ## directive
+   * 
+   * @param line
+   * @param gffProps
+   * @param align
+   * @param newseqs
+   * @throws IOException
+   */
+  protected void processGffPragma(String line,
+          Map<String, String> gffProps, AlignmentI align,
+          List<SequenceI> newseqs) throws IOException
+  {
+    line = line.trim();
+    if ("###".equals(line))
+    {
+      // close off any open 'forward references'
+      return;
+    }
+
+    String[] tokens = line.substring(2).split(" ");
+    String pragma = tokens[0];
+    String value = tokens.length == 1 ? null : tokens[1];
+
+    if ("gff-version".equalsIgnoreCase(pragma))
+    {
+      if (value != null)
+      {
+        try
+        {
+          // value may be e.g. "3.1.2"
+          gffVersion = Integer.parseInt(value.split("\\.")[0]);
+        } catch (NumberFormatException e)
+        {
+          // ignore
+        }
+      }
+    }
+    else if ("sequence-region".equalsIgnoreCase(pragma))
+    {
+      // could capture <seqid start end> if wanted here
+    }
+    else if ("feature-ontology".equalsIgnoreCase(pragma))
+    {
+      // should resolve against the specified feature ontology URI
+    }
+    else if ("attribute-ontology".equalsIgnoreCase(pragma))
+    {
+      // URI of attribute ontology - not currently used in GFF3
+    }
+    else if ("source-ontology".equalsIgnoreCase(pragma))
+    {
+      // URI of source ontology - not currently used in GFF3
+    }
+    else if ("species-build".equalsIgnoreCase(pragma))
+    {
+      // save URI of specific NCBI taxon version of annotations
+      gffProps.put("species-build", value);
+    }
+    else if ("fasta".equalsIgnoreCase(pragma))
+    {
+      // process the rest of the file as a fasta file and replace any dummy
+      // sequence IDs
+      processAsFasta(align, newseqs);
+    }
+    else
+    {
+      System.err.println("Ignoring unknown pragma: " + line);
+    }
+  }
 }
index 11c40c3..b1944b4 100755 (executable)
@@ -22,6 +22,7 @@ package jalview.io;
 
 import jalview.api.ComplexAlignFile;
 import jalview.api.FeaturesDisplayedI;
+import jalview.api.FeaturesSourceI;
 import jalview.bin.Jalview;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.ColumnSelection;
@@ -100,6 +101,7 @@ public class FileLoader implements Runnable
 
     SwingUtilities.invokeLater(new Runnable()
     {
+      @Override
       public void run()
       {
         loader.start();
@@ -234,6 +236,7 @@ public class FileLoader implements Runnable
     }
   }
 
+  @Override
   public void run()
   {
     String title = protocol.equals(AppletFormatAdapter.PASTE) ? "Copied From Clipboard"
@@ -250,14 +253,14 @@ public class FileLoader implements Runnable
         // just in case the caller didn't identify the file for us
         if (source != null)
         {
-          format = new IdentifyFile().Identify(source, false); // identify
+          format = new IdentifyFile().identify(source, false); // identify
           // stream and
           // rewind rather
           // than close
         }
         else
         {
-          format = new IdentifyFile().Identify(file, protocol);
+          format = new IdentifyFile().identify(file, protocol);
         }
 
       }
@@ -374,18 +377,15 @@ public class FileLoader implements Runnable
                       .getColumnSelection();
               SequenceI[] hiddenSeqs = ((ComplexAlignFile) source)
                       .getHiddenSequences();
-              boolean showSeqFeatures = ((ComplexAlignFile) source)
-                      .isShowSeqFeatures();
               String colourSchemeName = ((ComplexAlignFile) source)
                       .getGlobalColourScheme();
               FeaturesDisplayedI fd = ((ComplexAlignFile) source)
                       .getDisplayedFeatures();
               alignFrame = new AlignFrame(al, hiddenSeqs, colSel,
                       AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
-
-              alignFrame.getViewport().setShowSequenceFeatures(
-                      showSeqFeatures);
               alignFrame.getViewport().setFeaturesDisplayed(fd);
+              alignFrame.getViewport().setShowSequenceFeatures(
+                      ((ComplexAlignFile) source).isShowSeqFeatures());
               ColourSchemeI cs = ColourSchemeMapper.getJalviewColourScheme(
                       colourSchemeName, al);
               if (cs != null)
@@ -397,6 +397,10 @@ public class FileLoader implements Runnable
             {
               alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH,
                       AlignFrame.DEFAULT_HEIGHT);
+              if (source instanceof FeaturesSourceI)
+              {
+                alignFrame.getViewport().setShowSequenceFeatures(true);
+              }
             }
             // add metadata and update ui
             if (!protocol.equals(AppletFormatAdapter.PASTE))
@@ -446,6 +450,7 @@ public class FileLoader implements Runnable
           {
             javax.swing.SwingUtilities.invokeLater(new Runnable()
             {
+              @Override
               public void run()
               {
                 JOptionPane.showInternalMessageDialog(Desktop.desktop,
@@ -472,6 +477,7 @@ public class FileLoader implements Runnable
       {
         javax.swing.SwingUtilities.invokeLater(new Runnable()
         {
+          @Override
           public void run()
           {
             javax.swing.JOptionPane.showInternalMessageDialog(
@@ -493,6 +499,7 @@ public class FileLoader implements Runnable
       {
         javax.swing.SwingUtilities.invokeLater(new Runnable()
         {
+          @Override
           public void run()
           {
             javax.swing.JOptionPane.showInternalMessageDialog(
@@ -551,6 +558,7 @@ public class FileLoader implements Runnable
    * 
    * @see java.lang.Object#finalize()
    */
+  @Override
   protected void finalize() throws Throwable
   {
     source = null;
index 405363d..61f5127 100755 (executable)
@@ -274,6 +274,30 @@ public class FileParse
   }
 
   /**
+   * not for general use, creates a fileParse object for an existing reader with
+   * configurable values for the origin and the type of the source
+   */
+  public FileParse(BufferedReader source, String originString,
+          String typeString)
+  {
+    type = typeString;
+    error = false;
+    inFile = null;
+    dataName = originString;
+    dataIn = source;
+    try
+    {
+      if (dataIn.markSupported())
+      {
+        dataIn.mark(READAHEAD_LIMIT);
+      }
+    } catch (IOException q)
+    {
+
+    }
+  }
+
+  /**
    * Create a datasource for input to Jalview. See AppletFormatAdapter for the
    * types of sources that are handled.
    * 
@@ -457,11 +481,19 @@ public class FileParse
   }
 
   /**
-   * rewinds the datasource the beginning.
+   * Rewinds the datasource to the marked point if possible
+   * 
+   * @param bytesRead
    * 
    */
-  public void reset() throws IOException
+  public void reset(int bytesRead) throws IOException
   {
+    if (bytesRead >= READAHEAD_LIMIT)
+    {
+      System.err.println(String.format(
+              "File reset error: read %d bytes but reset limit is %d",
+              bytesRead, READAHEAD_LIMIT));
+    }
     if (dataIn != null && !error)
     {
       dataIn.reset();
diff --git a/src/jalview/io/Gff3File.java b/src/jalview/io/Gff3File.java
deleted file mode 100644 (file)
index 248fa09..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- * 
- * This file is part of Jalview.
- * 
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License 
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *  
- * Jalview is distributed in the hope that it will be useful, but 
- * WITHOUT ANY WARRANTY; without even the implied warranty 
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
- * PURPOSE.  See the GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.io;
-
-import jalview.api.AlignViewportI;
-import jalview.datamodel.AlignedCodonFrame;
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.SequenceI;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * A GFF3 File parsing wrapper for the tangled mess that is FeaturesFile.
- * 
- * This class implements the methods relied on by FileLoader/FormatAdapter in
- * order to allow them to load alignments directly from GFF2 and GFF3 files that
- * contain sequence data and alignment information.
- * 
- * Major issues:
- * 
- * 1. GFF3 files commonly include mappings between DNA, RNA and Protein - so
- * this class needs a dataset AlignmentI context to create alignment codon
- * mappings.
- * 
- * 2. A single GFF3 file can generate many distinct alignments. Support will be
- * needed to allow several AlignmentI instances to be generated from a single
- * file.
- * 
- * 
- * @author jprocter
- *
- */
-public class Gff3File extends FeaturesFile
-{
-
-  /**
-   * 
-   */
-  public Gff3File()
-  {
-    super();
-  }
-
-  /**
-   * @param source
-   * @throws IOException
-   */
-  public Gff3File(FileParse source) throws IOException
-  {
-    super(source);
-  }
-
-  /**
-   * @param inFile
-   * @param type
-   * @throws IOException
-   */
-  public Gff3File(String inFile, String type) throws IOException
-  {
-    super(inFile, type);
-  }
-
-  /**
-   * @param parseImmediately
-   * @param source
-   * @throws IOException
-   */
-  public Gff3File(boolean parseImmediately, FileParse source)
-          throws IOException
-  {
-    super(parseImmediately, source);
-  }
-
-  /**
-   * @param parseImmediately
-   * @param inFile
-   * @param type
-   * @throws IOException
-   */
-  public Gff3File(boolean parseImmediately, String inFile, String type)
-          throws IOException
-  {
-    super(parseImmediately, inFile, type);
-  }
-
-  /*
-   * (non-Javadoc)
-   * 
-   * @see jalview.io.FeaturesFile#print()
-   */
-  @Override
-  public String print()
-  {
-    // TODO GFF3 writer with sensible defaults for writing alignment data
-
-    // return super.printGFFFormat(seqs, visible);
-    return ("Not yet implemented.");
-  }
-
-  AlignmentI dataset;
-
-  List<AlignmentI> alignments;
-
-  @Override
-  public void parse()
-  {
-    AlignViewportI av = getViewport();
-    if (av != null)
-    {
-      if (av.getAlignment() != null)
-      {
-        dataset = av.getAlignment().getDataset();
-      }
-      if (dataset == null)
-      {
-        // working in the applet context ?
-        dataset = av.getAlignment();
-      }
-    }
-    else
-    {
-      dataset = new Alignment(new SequenceI[] {});
-    }
-
-    boolean parseResult = parse(dataset, null, null, false, true);
-    if (!parseResult)
-    {
-      // pass error up somehow
-    }
-    if (av != null)
-    {
-      // update viewport with the dataset data ?
-    }
-    else
-    {
-      setSeqs(dataset.getSequencesArray());
-    }
-
-  }
-
-  @Override
-  public void addProperties(AlignmentI al)
-  {
-    super.addProperties(al);
-    if (dataset.getCodonFrames() != null)
-    {
-      AlignmentI ds = (al.getDataset() == null) ? al : al.getDataset();
-      for (AlignedCodonFrame codons : dataset.getCodonFrames())
-      {
-        ds.addCodonFrame(codons);
-      }
-    }
-  }
-}
index aec0540..40e9390 100755 (executable)
@@ -30,7 +30,7 @@ import java.io.IOException;
  */
 public class IdentifyFile
 {
-  public static final String GFF3File = "GFF v2 or v3";
+  public static final String FeaturesFile = "GFF or Jalview features";
 
   /**
    * Identify a datasource's file content.
@@ -44,7 +44,7 @@ public class IdentifyFile
    *          DOCUMENT ME!
    * @return ID String
    */
-  public String Identify(String file, String protocol)
+  public String identify(String file, String protocol)
   {
     String emessage = "UNIDENTIFIED FILE PARSING ERROR";
     FileParse parser = null;
@@ -53,7 +53,7 @@ public class IdentifyFile
       parser = new FileParse(file, protocol);
       if (parser.isValid())
       {
-        return Identify(parser);
+        return identify(parser);
       }
     } catch (Exception e)
     {
@@ -68,9 +68,9 @@ public class IdentifyFile
     return emessage;
   }
 
-  public String Identify(FileParse source)
+  public String identify(FileParse source)
   {
-    return Identify(source, true); // preserves original behaviour prior to
+    return identify(source, true); // preserves original behaviour prior to
     // version 2.3
   }
 
@@ -82,11 +82,12 @@ public class IdentifyFile
    * @param closeSource
    * @return filetype string
    */
-  public String Identify(FileParse source, boolean closeSource)
+  public String identify(FileParse source, boolean closeSource)
   {
     String reply = "PFAM";
     String data;
-    int length = 0;
+    int bytesRead = 0;
+    int trimmedLength = 0;
     boolean lineswereskipped = false;
     boolean isBinary = false; // true if length is non-zero and non-printable
     // characters are encountered
@@ -98,7 +99,8 @@ public class IdentifyFile
       }
       while ((data = source.nextLine()) != null)
       {
-        length += data.trim().length();
+        bytesRead += data.length();
+        trimmedLength += data.trim().length();
         if (!lineswereskipped)
         {
           for (int i = 0; !isBinary && i < data.length(); i++)
@@ -134,7 +136,13 @@ public class IdentifyFile
 
         if (data.startsWith("##GFF-VERSION"))
         {
-          reply = GFF3File;
+          // GFF - possibly embedded in a Jalview features file!
+          reply = FeaturesFile;
+          break;
+        }
+        if (looksLikeFeatureData(data))
+        {
+          reply = FeaturesFile;
           break;
         }
         if (data.indexOf("# STOCKHOLM") > -1)
@@ -238,6 +246,7 @@ public class IdentifyFile
         if ((data.indexOf("<") > -1)) // possible Markup Language data i.e HTML,
                                       // RNAML, XML
         {
+          // FIXME this is nuts - it consumes the rest of the file if no match
           boolean identified = false;
           do
           {
@@ -309,23 +318,13 @@ public class IdentifyFile
           reply = PhylipFile.FILE_DESC;
           break;
         }
-
-        /*
-         * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else
-         * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply =
-         * "SimpleBLAST"; break;
-         * 
-         * } // end comments for Jalview 2.4.1
-         */
-        else if (!lineswereskipped && data.charAt(0) != '*'
-                && data.charAt(0) != ' '
-                && data.indexOf(":") < data.indexOf(",")) // &&
-        // data.indexOf(",")<data.indexOf(",",
-        // data.indexOf(",")))
+        else
         {
-          // file looks like a concise JNet file
-          reply = "JnetFile";
-          break;
+          if (!lineswereskipped && looksLikeJnetData(data))
+          {
+            reply = "JnetFile";
+            break;
+          }
         }
 
         lineswereskipped = true; // this means there was some junk before any
@@ -337,14 +336,14 @@ public class IdentifyFile
       }
       else
       {
-        source.reset(); // so the file can be parsed from the beginning again.
+        source.reset(bytesRead); // so the file can be parsed from the mark
       }
     } catch (Exception ex)
     {
       System.err.println("File Identification failed!\n" + ex);
       return source.errormessage;
     }
-    if (length == 0)
+    if (trimmedLength == 0)
     {
       System.err
               .println("File Identification failed! - Empty file was read.");
@@ -353,13 +352,57 @@ public class IdentifyFile
     return reply;
   }
 
+  /**
+   * Returns true if the data appears to be Jnet concise annotation format
+   * 
+   * @param data
+   * @return
+   */
+  protected boolean looksLikeJnetData(String data)
+  {
+    char firstChar = data.charAt(0);
+    int colonPos = data.indexOf(":");
+    int commaPos = data.indexOf(",");
+    boolean isJnet = firstChar != '*' && firstChar != ' ' && colonPos > -1
+            && commaPos > -1 && colonPos < commaPos;
+    // && data.indexOf(",")<data.indexOf(",", data.indexOf(","))) / ??
+    return isJnet;
+  }
+
+  /**
+   * Returns true if the data has at least 6 tab-delimited fields _and_ 
+   * fields 4 and 5 are integer (start/end) 
+   * @param data
+   * @return
+   */
+  protected boolean looksLikeFeatureData(String data)
+  {
+    if (data == null)
+    {
+      return false;
+    }
+    String[] columns = data.split("\t");
+    if (columns.length < 6) {
+      return false;
+    }
+    for (int col = 3; col < 5; col++)
+    {
+      try {
+        Integer.parseInt(columns[col]);
+      } catch (NumberFormatException e) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   public static void main(String[] args)
   {
 
     for (int i = 0; args != null && i < args.length; i++)
     {
       IdentifyFile ider = new IdentifyFile();
-      String type = ider.Identify(args[i], AppletFormatAdapter.FILE);
+      String type = ider.identify(args[i], AppletFormatAdapter.FILE);
       System.out.println("Type of " + args[i] + " is " + type);
     }
     if (args == null || args.length == 0)
index a093ebe..2c35547 100644 (file)
@@ -43,7 +43,6 @@ import java.util.Hashtable;
 import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Set;
 import java.util.Vector;
 import java.util.jar.JarInputStream;
 import java.util.jar.JarOutputStream;
@@ -1433,7 +1432,7 @@ public class VamsasAppDatastore
         // to the align frames.
         boolean gathered = false;
         String newviewid = null;
-        Set<AlignedCodonFrame> mappings = av.getAlignment()
+        List<AlignedCodonFrame> mappings = av.getAlignment()
                 .getCodonFrames();
         for (int i = 0; i < views.length; i++)
         {
@@ -2732,7 +2731,7 @@ public class VamsasAppDatastore
 
       }
       // Store any sequence mappings.
-      Set<AlignedCodonFrame> cframes = av.getAlignment().getCodonFrames();
+      List<AlignedCodonFrame> cframes = av.getAlignment().getCodonFrames();
       if (cframes != null)
       {
         for (AlignedCodonFrame acf : cframes)
diff --git a/src/jalview/io/gff/ExonerateHelper.java b/src/jalview/io/gff/ExonerateHelper.java
new file mode 100644 (file)
index 0000000..f7805fd
--- /dev/null
@@ -0,0 +1,348 @@
+package jalview.io.gff;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.MappingType;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A handler to parse GFF in the format generated by the exonerate tool
+ */
+public class ExonerateHelper extends Gff2Helper
+{
+  private static final String SIMILARITY = "similarity";
+
+  private static final String GENOME2GENOME = "genome2genome";
+
+  private static final String CDNA2GENOME = "cdna2genome";
+
+  private static final String CODING2GENOME = "coding2genome";
+
+  private static final String CODING2CODING = "coding2coding";
+
+  private static final String PROTEIN2GENOME = "protein2genome";
+
+  private static final String PROTEIN2DNA = "protein2dna";
+
+  private static final String ALIGN = "Align";
+
+  private static final String QUERY = "Query";
+
+  private static final String TARGET = "Target";
+
+  /**
+   * Process one GFF feature line (as modelled by SequenceFeature)
+   * 
+   * @param seq
+   *          the sequence with which this feature is associated
+   * @param gffColumns
+   *          the sequence feature with ATTRIBUTES property containing any
+   *          additional attributes
+   * @param align
+   *          the alignment we are adding GFF to
+   * @param newseqs
+   *          any new sequences referenced by the GFF
+   * @param relaxedIdMatching
+   *          if true, match word tokens in sequence names
+   * @return true if the sequence feature should be added to the sequence, else
+   *         false (i.e. it has been processed in another way e.g. to generate a
+   *         mapping)
+   */
+  @Override
+  public SequenceFeature processGff(SequenceI seq, String[] gffColumns,
+          AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching)
+  {
+    String attr = gffColumns[ATTRIBUTES_COL];
+    Map<String, List<String>> set = parseNameValuePairs(attr);
+
+    try
+    {
+      processGffSimilarity(set, seq, gffColumns,
+              align, newseqs, relaxedIdMatching);
+    } catch (IOException ivfe)
+    {
+      System.err.println(ivfe);
+    }
+
+    /*
+     * return null to indicate we don't want to add a sequence feature for
+     * similarity (only process it to create mappings)
+     */
+    return null;
+  }
+
+  /**
+   * Processes the 'Query' (or 'Target') and 'Align' properties associated with
+   * an exonerate GFF similarity feature; these properties define the mapping of
+   * the annotated range to a related sequence.
+   * 
+   * @param set
+   *          parsed GFF column 9 key/value(s)
+   * @param seq
+   *          the sequence the GFF feature is on
+   * @param gff
+   *          the GFF column data
+   * @param align
+   *          the alignment the sequence belongs to, where any new mappings
+   *          should be added
+   * @param newseqs
+   *          a list of new 'virtual sequences' generated while parsing GFF
+   * @param relaxedIdMatching
+   *          if true allow fuzzy search for a matching target sequence
+   * @throws IOException
+   */
+  protected void processGffSimilarity(
+          Map<String, List<String>> set,
+          SequenceI seq, String[] gff, AlignmentI align,
+          List<SequenceI> newseqs, boolean relaxedIdMatching)
+          throws IOException
+  {
+    /*
+     * exonerate may be run with
+     * --showquerygff - outputs 'features on the query' e.g. (protein2genome)  
+     *     Target <dnaseqid> ; Align proteinStartPos dnaStartPos proteinCount  
+     * --showtargetgff - outputs 'features on the target' e.g. (protein2genome)
+     *     Query <proteinseqid> ; Align dnaStartPos proteinStartPos nucleotideCount
+     * where the Align spec may repeat 
+     */
+    // TODO handle coding2coding and similar as well
+    boolean featureIsOnTarget = true;
+    List<String> mapTo = set.get(QUERY);
+    if (mapTo == null)
+    {
+      mapTo = set.get(TARGET);
+      featureIsOnTarget = false;
+    }
+    MappingType type = getMappingType(gff[SOURCE_COL]);
+
+    if (type == null)
+    {
+      throw new IOException("Sorry, I don't handle " + gff[SOURCE_COL]);
+    }
+
+    if (mapTo == null || mapTo.size() != 1)
+    {
+      throw new IOException(
+              "Expecting exactly one sequence in Query or Target field (got "
+                      + mapTo + ")");
+    }
+
+    /*
+     * locate the mapped sequence in the alignment or 'new' (GFF file) sequences; 
+     */
+    SequenceI mappedSequence = findSequence(mapTo.get(0), align, newseqs,
+            relaxedIdMatching);
+
+    /*
+     * If mapping is from protein to dna, we store it as dna to protein instead
+     */
+    SequenceI mapFromSequence = seq;
+    SequenceI mapToSequence = mappedSequence;
+    if ((type == MappingType.NucleotideToPeptide && featureIsOnTarget)
+            || (type == MappingType.PeptideToNucleotide && !featureIsOnTarget))
+    {
+      mapFromSequence = mappedSequence;
+      mapToSequence = seq;
+    }
+
+    /*
+     * Process the Align maps and create mappings.
+     * These may be cdna-genome, cdna-protein, genome-protein.
+     * The mapped sequences may or may not be in the alignment
+     * (they may be included later in the GFF file).
+     */
+
+    /*
+     * get any existing mapping for these sequences (or start one),
+     * and add this mapped range
+     */
+    AlignedCodonFrame acf = getMapping(align, mapFromSequence,
+            mapToSequence);
+
+    /*
+     * exonerate GFF has the strand of the target in column 7
+     * (differs from GFF3 which has it in the Target descriptor)
+     */
+    String strand = gff[STRAND_COL];
+    boolean forwardStrand = true;
+    if ("-".equals(strand))
+    {
+      forwardStrand = false;
+    }
+    else if (!"+".equals(strand))
+    {
+      System.err.println("Strand must be specified for alignment");
+      return;
+    }
+
+    List<String> alignedRegions = set.get(ALIGN);
+    for (String region : alignedRegions)
+    {
+      MapList mapping = buildMapping(region, type, forwardStrand,
+              featureIsOnTarget, gff);
+
+      if (mapping == null)
+      {
+        continue;
+      }
+
+      acf.addMap(mapFromSequence, mapToSequence, mapping);
+    }
+    align.addCodonFrame(acf);
+  }
+
+  /**
+   * Construct the mapping
+   * 
+   * @param region
+   * @param type
+   * @param forwardStrand
+   * @param featureIsOnTarget
+   * @param gff
+   * @return
+   */
+  protected MapList buildMapping(String region, MappingType type,
+          boolean forwardStrand, boolean featureIsOnTarget, String[] gff)
+  {
+    /*
+     * process one "fromStart toStart fromCount" descriptor
+     */
+    String[] tokens = region.split(" ");
+    if (tokens.length != 3)
+    {
+      System.err.println("Malformed Align descriptor: " + region);
+      return null;
+    }
+
+    /*
+     * get start/end of from/to mappings
+     * if feature is on the target sequence we have to invert the sense
+     */
+    int alignFromStart;
+    int alignToStart;
+    int alignCount;
+    try {
+      alignFromStart = Integer.parseInt(tokens[0]);
+      alignToStart = Integer.parseInt(tokens[1]);
+      alignCount = Integer.parseInt(tokens[2]);
+    } catch (NumberFormatException nfe) {
+      System.err.println(nfe.toString());
+      return null;
+    }
+    
+    int fromStart;
+    int fromEnd;
+    int toStart;
+    int toEnd;
+
+    if (featureIsOnTarget)
+    {
+      fromStart = alignToStart;
+      toStart = alignFromStart;
+      toEnd = forwardStrand ? toStart + alignCount - 1 : toStart
+              - (alignCount - 1);
+      int toLength = Math.abs(toEnd - toStart) + 1;
+      int fromLength = toLength * type.getFromRatio() / type.getToRatio();
+      fromEnd = fromStart + fromLength - 1;
+    }
+    else
+    {
+      // we use the 'Align' values here not the feature start/end
+      // not clear why they may differ but it seems they can
+      fromStart = alignFromStart;
+      fromEnd = alignFromStart + alignCount - 1;
+      int fromLength = fromEnd - fromStart + 1;
+      int toLength = fromLength * type.getToRatio() / type.getFromRatio();
+      toStart = alignToStart;
+      if (forwardStrand)
+      {
+        toEnd = toStart + toLength - 1;
+      }
+      else
+      {
+        toEnd = toStart - (toLength - 1);
+      }
+    }
+
+    MapList codonmapping = constructMappingFromAlign(fromStart, fromEnd,
+            toStart, toEnd, type);
+    return codonmapping;
+  }
+
+  /**
+   * Returns a MappingType depending on the exonerate 'model' value.
+   * 
+   * @param model
+   * @return
+   */
+  protected static MappingType getMappingType(String model)
+  {
+    MappingType result = null;
+
+    if (model.contains(PROTEIN2DNA) || model.contains(PROTEIN2GENOME))
+    {
+      result = MappingType.PeptideToNucleotide;
+    }
+    else if (model.contains(CODING2CODING)
+            || model.contains(CODING2GENOME)
+            || model.contains(CDNA2GENOME)
+            || model.contains(GENOME2GENOME))
+    {
+      result = MappingType.NucleotideToNucleotide;
+    }
+    return result;
+  }
+
+  /**
+   * Tests whether the GFF data looks like it was generated by exonerate, and is
+   * a format we are willing to handle
+   * 
+   * @param columns
+   * @return
+   */
+  public static boolean recognises(String[] columns)
+  {
+    if (!SIMILARITY.equalsIgnoreCase(columns[TYPE_COL]))
+    {
+      return false;
+    }
+
+    /*
+     * inspect alignment model
+     */
+    String model = columns[SOURCE_COL];
+    // e.g. exonerate:protein2genome:local
+    if (model != null)
+    {
+      String mdl = model.toLowerCase();
+      if (mdl.contains(PROTEIN2DNA) || mdl.contains(PROTEIN2GENOME)
+              || mdl.contains(CODING2CODING)
+              || mdl.contains(CODING2GENOME)
+              || mdl.contains(CDNA2GENOME)
+              || mdl.contains(GENOME2GENOME))
+      {
+        return true;
+      }
+    }
+    System.err.println("Sorry, I don't handle exonerate model " + model);
+    return false;
+  }
+
+  @Override
+  protected SequenceFeature buildSequenceFeature(String[] gff,
+          Map<String, List<String>> set)
+  {
+    SequenceFeature sf = super.buildSequenceFeature(gff, set);
+    sf.setFeatureGroup("exonerate");
+
+    return sf;
+  }
+
+}
diff --git a/src/jalview/io/gff/Gff2Helper.java b/src/jalview/io/gff/Gff2Helper.java
new file mode 100644 (file)
index 0000000..31303b1
--- /dev/null
@@ -0,0 +1,51 @@
+package jalview.io.gff;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+public class Gff2Helper extends GffHelperBase
+{
+  /**
+   * GFF2 uses space character to delimit name/value pairs on column 9
+   * 
+   * @param text
+   * @return
+   */
+  public static Map<String, List<String>> parseNameValuePairs(String text)
+  {
+    // TODO: can a value include a comma? if so it will be broken by this
+    return parseNameValuePairs(text, ";", ' ', ",");
+  }
+
+  /**
+   * Return ' ' as the name-value separator used in column 9 attributes.
+   */
+  @Override
+  protected char getNameValueSeparator()
+  {
+    return ' ';
+  }
+
+  /**
+   * Default processing if not overridden is just to construct a sequence
+   * feature
+   */
+  @Override
+  public SequenceFeature processGff(SequenceI seq, String[] gff,
+          AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching) throws IOException
+  {
+    Map<String, List<String>> attributes = null;
+    if (gff.length > ATTRIBUTES_COL)
+    {
+      attributes = parseNameValuePairs(gff[ATTRIBUTES_COL]);
+    }
+    return buildSequenceFeature(gff, attributes);
+  }
+
+}
diff --git a/src/jalview/io/gff/Gff3Helper.java b/src/jalview/io/gff/Gff3Helper.java
new file mode 100644 (file)
index 0000000..2e98e4e
--- /dev/null
@@ -0,0 +1,397 @@
+package jalview.io.gff;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.MappingType;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblSeqProxy;
+import jalview.util.MapList;
+import jalview.util.StringUtils;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Base class with generic / common functionality for processing GFF3 data.
+ * Override this as required for any specialisations resulting from
+ * peculiarities of GFF3 generated by particular tools.
+ */
+public class Gff3Helper extends GffHelperBase
+{
+  protected static final String TARGET = "Target";
+
+  protected static final String ID = "ID";
+
+  private static final String NAME = "Name";
+
+  /**
+   * GFF3 uses '=' to delimit name/value pairs in column 9, and comma to
+   * separate multiple values for a name
+   * 
+   * @param text
+   * @return
+   */
+  public static Map<String, List<String>> parseNameValuePairs(String text)
+  {
+    return parseNameValuePairs(text, ";", '=', ",");
+  }
+
+  /**
+   * Process one GFF feature line (as modelled by SequenceFeature)
+   * 
+   * @param seq
+   *          the sequence with which this feature is associated
+   * @param sf
+   *          the sequence feature with ATTRIBUTES property containing any
+   *          additional attributes
+   * @param align
+   *          the alignment we are adding GFF to
+   * @param newseqs
+   *          any new sequences referenced by the GFF
+   * @param relaxedIdMatching
+   *          if true, match word tokens in sequence names
+   * @return true if the sequence feature should be added to the sequence, else
+   *         false (i.e. it has been processed in another way e.g. to generate a
+   *         mapping)
+   * @throws IOException
+   */
+  @Override
+  public SequenceFeature processGff(SequenceI seq, String[] gff,
+          AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching) throws IOException
+  {
+    SequenceFeature sf = null;
+
+    if (gff.length == 9)
+    {
+      String soTerm = gff[TYPE_COL];
+      String atts = gff[ATTRIBUTES_COL];
+      Map<String, List<String>> attributes = parseNameValuePairs(atts);
+
+      if (SequenceOntology.getInstance().isProteinMatch(soTerm))
+      {
+        sf = processProteinMatch(attributes, seq, gff, align,
+                newseqs, relaxedIdMatching);
+      }
+      else if (SequenceOntology.getInstance().isNucleotideMatch(soTerm))
+      {
+        sf = processNucleotideMatch(attributes, seq, gff, align,
+                newseqs, relaxedIdMatching);
+      }
+      else
+      {
+        sf = buildSequenceFeature(gff, attributes);
+      }
+    }
+    else
+    {
+      /*
+       * fall back on generating a sequence feature with no special processing
+       */
+      sf = buildSequenceFeature(gff, null);
+    }
+  
+    return sf;
+  }
+
+  /**
+   * Processes one GFF3 nucleotide (e.g. cDNA to genome) match.
+   * 
+   * @param attributes
+   *          parsed GFF column 9 key/value(s)
+   * @param seq
+   *          the sequence the GFF feature is on
+   * @param gffColumns
+   *          the GFF column data
+   * @param align
+   *          the alignment the sequence belongs to, where any new mappings
+   *          should be added
+   * @param newseqs
+   *          a list of new 'virtual sequences' generated while parsing GFF
+   * @param relaxedIdMatching
+   *          if true allow fuzzy search for a matching target sequence
+   * @return a sequence feature, if one should be added to the sequence, else
+   *         null
+   * @throws IOException
+   */
+  protected SequenceFeature processNucleotideMatch(
+          Map<String, List<String>> attributes, SequenceI seq,
+          String[] gffColumns, AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching)
+          throws IOException
+  {
+    String strand = gffColumns[STRAND_COL];
+
+    /*
+     * (For now) we don't process mappings from reverse complement ; to do
+     * this would require (a) creating a virtual sequence placeholder for
+     * the reverse complement (b) resolving the sequence by its id from some
+     * source (GFF ##FASTA or other) (c) creating the reverse complement
+     * sequence (d) updating the mapping to be to the reverse complement
+     */
+    if ("-".equals(strand))
+    {
+      System.err
+              .println("Skipping mapping from reverse complement as not yet supported");
+      return null;
+    }
+
+    List<String> targets = attributes.get(TARGET);
+    if (targets == null)
+    {
+      System.err.println("'Target' missing in GFF");
+      return null;
+    }
+
+    /*
+     * Typically we only expect one Target per GFF line, but this can handle
+     * multiple matches, to the same or different sequences (e.g. dna variants)
+     */
+    for (String target : targets)
+    {
+      /*
+       * Process "seqid start end [strand]"
+       */
+      String[] tokens = target.split(" ");
+      if (tokens.length < 3)
+      {
+        System.err.println("Incomplete Target: " + target);
+        continue;
+      }
+
+      /*
+       * Locate the mapped sequence in the alignment, or as a 
+       * (new or existing) virtual sequence in the newseqs list 
+       */
+      String targetId = findTargetId(tokens[0], attributes);
+      SequenceI mappedSequence1 = findSequence(targetId, align,
+      newseqs, relaxedIdMatching);
+      SequenceI mappedSequence = mappedSequence1;
+      if (mappedSequence == null)
+      {
+        continue;
+      }
+
+      /*
+       * get any existing mapping for these sequences (or start one),
+       * and add this mapped range
+       */
+      AlignedCodonFrame acf = getMapping(align, seq, mappedSequence);
+
+      try
+      {
+        int toStart = Integer.parseInt(tokens[1]);
+        int toEnd = Integer.parseInt(tokens[2]);
+        if (tokens.length > 3 && "-".equals(tokens[3]))
+        {
+          // mapping to reverse strand - swap start/end
+          int temp = toStart;
+          toStart = toEnd;
+          toEnd = temp;
+        }
+
+        int fromStart = Integer.parseInt(gffColumns[START_COL]);
+        int fromEnd = Integer.parseInt(gffColumns[END_COL]);
+        MapList mapping = constructMappingFromAlign(fromStart, fromEnd,
+                toStart, toEnd,
+                MappingType.NucleotideToNucleotide);
+
+        if (mapping != null)
+        {
+          acf.addMap(seq, mappedSequence, mapping);
+          align.addCodonFrame(acf);
+        }
+      } catch (NumberFormatException nfe)
+      {
+        System.err.println("Invalid start or end in Target " + target);
+      }
+    }
+
+    SequenceFeature sf = buildSequenceFeature(gffColumns, attributes);
+    return sf;
+  }
+
+  /**
+   * Returns the target sequence id extracted from the GFF name/value pairs.
+   * Default (standard behaviour) is the first token for "Target". This may be
+   * overridden where tools report this in a non-standard way.
+   * 
+   * @param target
+   *          first token of a "Target" value from GFF column 9, typically
+   *          "seqid start end"
+   * @param set
+   *          a map with all parsed column 9 attributes
+   * @return
+   */
+  @SuppressWarnings("unused")
+  protected String findTargetId(String target, Map<String, List<String>> set)
+  {
+    return target;
+  }
+
+  /**
+   * Processes one GFF 'protein_match'; fields of interest are
+   * <ul>
+   * <li>feature group - the database reporting a match e.g. Pfam</li>
+   * <li>Name - the matched entry's accession id in the database</li>
+   * <li>ID - a sequence identifier for the matched region (which may be
+   * appended as FASTA in the GFF file)</li>
+   * </ul>
+   * 
+   * @param set
+   *          parsed GFF column 9 key/value(s)
+   * @param seq
+   *          the sequence the GFF feature is on
+   * @param gffColumns
+   *          the sequence feature holding GFF data
+   * @param align
+   *          the alignment the sequence belongs to, where any new mappings
+   *          should be added
+   * @param newseqs
+   *          a list of new 'virtual sequences' generated while parsing GFF
+   * @param relaxedIdMatching
+   *          if true allow fuzzy search for a matching target sequence
+   * @return the (real or virtual) sequence(s) mapped to by this match
+   * @throws IOException
+   */
+  protected SequenceFeature processProteinMatch(
+          Map<String, List<String>> set, SequenceI seq,
+          String[] gffColumns, AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching)
+  {
+    // This is currently tailored to InterProScan GFF output:
+    // ID holds the ID of the matched sequence, Target references the
+    // query sequence; this looks wrong, as ID should just be the GFF internal
+    // ID of the GFF feature, while Target would normally reference the matched
+    // sequence.
+    // TODO refactor as needed if other protein-protein GFF varies
+
+    SequenceFeature sf = buildSequenceFeature(gffColumns, set);
+
+    /*
+     * locate the mapped sequence in the alignment, or as a 
+     * (new or existing) virtual sequence in the newseqs list 
+     */
+    List<String> targets = set.get(TARGET);
+    if (targets != null)
+    {
+      for (String target : targets)
+      {
+
+        SequenceI mappedSequence1 = findSequence(findTargetId(target, set), align,
+        newseqs, relaxedIdMatching);
+        SequenceI mappedSequence = mappedSequence1;
+        if (mappedSequence == null)
+        {
+          continue;
+        }
+
+        /*
+         * give the mapped sequence a copy of the sequence feature, with 
+         * start/end range adjusted 
+         */
+        SequenceFeature sf2 = new SequenceFeature(sf);
+        sf2.setBegin(1);
+        int sequenceFeatureLength = 1 + sf.getEnd() - sf.getBegin();
+        sf2.setEnd(sequenceFeatureLength);
+        mappedSequence.addSequenceFeature(sf2);
+
+        /*
+         * add a property to the mapped sequence so that it can eventually be
+         * renamed with its qualified accession id; renaming has to wait until
+         * all sequence reference resolution is complete
+         */
+        String accessionId = StringUtils.listToDelimitedString(
+                set.get(NAME), ",");
+        if (accessionId.length() > 0)
+        {
+          String database = sf.getType(); // TODO InterProScan only??
+          String qualifiedAccId = database + "|" + accessionId;
+          sf2.setValue(RENAME_TOKEN, qualifiedAccId);
+        }
+
+        /*
+         * get any existing mapping for these sequences (or start one),
+         * and add this mapped range
+         */
+        AlignedCodonFrame alco = getMapping(align, seq, mappedSequence);
+        int[] from = new int[] { sf.getBegin(), sf.getEnd() };
+        int[] to = new int[] { 1, sequenceFeatureLength };
+        MapList mapping = new MapList(from, to, 1, 1);
+
+        alco.addMap(seq, mappedSequence, mapping);
+        align.addCodonFrame(alco);
+      }
+    }
+
+    return sf;
+  }
+
+  /**
+   * Return '=' as the name-value separator used in column 9 attributes.
+   */
+  @Override
+  protected char getNameValueSeparator()
+  {
+    return '=';
+  }
+
+  /**
+   * Modifies the default SequenceFeature in order to set the Target sequence id
+   * as the description
+   */
+  @Override
+  protected SequenceFeature buildSequenceFeature(String[] gff,
+          Map<String, List<String>> attributes)
+  {
+    SequenceFeature sf = super.buildSequenceFeature(gff, attributes);
+    String desc = getDescription(sf, attributes);
+    if (desc != null)
+    {
+      sf.setDescription(desc);
+    }
+    return sf;
+  }
+
+  /**
+   * Apply heuristic rules to try to get the most useful feature description
+   * 
+   * @param sf
+   * @param attributes
+   * @return
+   */
+  protected String getDescription(SequenceFeature sf,
+          Map<String, List<String>> attributes)
+  {
+    String desc = null;
+    String target = (String) sf.getValue(TARGET);
+    if (target != null)
+    {
+      desc = target.split(" ")[0];
+    }
+
+    SequenceOntology so = SequenceOntology.getInstance();
+    String type = sf.getType();
+    if (so.isSequenceVariant(type))
+    {
+      /*
+       * Ensembl returns dna variants as 'alleles'
+       */
+      desc = StringUtils.listToDelimitedString(
+              attributes.get("alleles"), ",");
+    }
+
+    /*
+     * extract 'Name' for a transcript (to show gene name)
+     * or an exon (so 'colour by label' shows exon boundaries) 
+     */
+    if (EnsemblSeqProxy.isTranscript(type)
+            || so.isA(type, SequenceOntology.EXON))
+    {
+      desc = StringUtils.listToDelimitedString(attributes.get("Name"), ",");
+    }
+    return desc;
+  }
+}
diff --git a/src/jalview/io/gff/GffHelperBase.java b/src/jalview/io/gff/GffHelperBase.java
new file mode 100644 (file)
index 0000000..feeec1d
--- /dev/null
@@ -0,0 +1,405 @@
+package jalview.io.gff;
+
+import jalview.analysis.SequenceIdMatcher;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.MappingType;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.MapList;
+import jalview.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * Base class with common functionality for flavours of GFF handler (GFF2 or
+ * GFF3)
+ */
+public abstract class GffHelperBase implements GffHelperI
+{
+  private static final String NOTE = "Note";
+
+  /*
+   * GFF columns 1-9 (zero-indexed):
+   */
+  protected static final int SEQID_COL = 0;
+
+  protected static final int SOURCE_COL = 1;
+
+  protected static final int TYPE_COL = 2;
+
+  protected static final int START_COL = 3;
+
+  protected static final int END_COL = 4;
+
+  protected static final int SCORE_COL = 5;
+
+  protected static final int STRAND_COL = 6;
+
+  protected static final int PHASE_COL = 7;
+
+  protected static final int ATTRIBUTES_COL = 8;
+
+  private AlignmentI lastmatchedAl = null;
+
+  private SequenceIdMatcher matcher = null;
+
+  /**
+   * Constructs and returns a mapping, or null if data appear invalid
+   * 
+   * @param fromStart
+   * @param fromEnd
+   * @param toStart
+   * @param toEnd
+   * @param mappingType
+   *          type of mapping (e.g. protein to nucleotide)
+   * @return
+   */
+  protected MapList constructMappingFromAlign(int fromStart, int fromEnd,
+          int toStart, int toEnd, MappingType mappingType)
+  {
+    int[] from = new int[] { fromStart, fromEnd };
+    int[] to = new int[] { toStart, toEnd };
+
+    /*
+     * Jalview always models from dna to protein, so switch values if the
+     * GFF mapping is from protein to dna
+     */
+    if (mappingType == MappingType.PeptideToNucleotide)
+    {
+      int[] temp = from;
+      from = to;
+      to = temp;
+      mappingType = mappingType.getInverse();
+    }
+
+    int fromRatio = mappingType.getFromRatio();
+    int toRatio = mappingType.getToRatio();
+
+    /*
+     * sanity check that mapped residue counts match
+     * TODO understand why PASA generates such cases...
+     */
+    if (!trimMapping(from, to, fromRatio, toRatio))
+    {
+      System.err.println("Ignoring mapping from " + Arrays.toString(from)
+              + " to " + Arrays.toString(to) + " as counts don't match!");
+      return null;
+    }
+
+    /*
+     * If a codon has an intron gap, there will be contiguous 'toRanges';
+     * this is handled for us by the MapList constructor. 
+     * (It is not clear that exonerate ever generates this case)  
+     */
+
+    return new MapList(from, to, fromRatio, toRatio);
+  }
+
+  /**
+   * Checks that the 'from' and 'to' ranges have equivalent lengths. If not,
+   * tries to trim the end of the longer so they do. Returns true if the
+   * mappings could be made equivalent, else false. Note the range array values
+   * may be modified by this method.
+   * 
+   * @param from
+   * @param to
+   * @param fromRatio
+   * @param toRatio
+   * @return
+   */
+  protected static boolean trimMapping(int[] from, int[] to, int fromRatio,
+          int toRatio)
+  {
+    int fromLength = Math.abs(from[1] - from[0]) + 1;
+    int toLength = Math.abs(to[1] - to[0]) + 1;
+    int fromOverlap = fromLength * toRatio - toLength * fromRatio;
+    if (fromOverlap == 0)
+    {
+      return true;
+    }
+    if (fromOverlap > 0 && fromOverlap % toRatio == 0)
+    {
+      /*
+       * restrict from range to make them match up
+       * it's kind of arbitrary which end we truncate - here it is the end
+       */
+      System.err.print("Truncating mapping from " + Arrays.toString(from)
+              + " to ");
+      if (from[1] > from[0])
+      {
+        from[1] -= fromOverlap / toRatio;
+      }
+      else
+      {
+        from[1] += fromOverlap / toRatio;
+      }
+      System.err.println(Arrays.toString(from));
+      return true;
+    }
+    else if (fromOverlap < 0 && fromOverlap % fromRatio == 0)
+    {
+      fromOverlap = -fromOverlap; // > 0
+      /*
+       * restrict to range to make them match up
+       */
+      System.err.print("Truncating mapping to " + Arrays.toString(to)
+              + " to ");
+      if (to[1] > to[0])
+      {
+        to[1] -= fromOverlap / fromRatio;
+      }
+      else
+      {
+        to[1] += fromOverlap / fromRatio;
+      }
+      System.err.println(Arrays.toString(to));
+      return true;
+    }
+
+    /*
+     * Couldn't truncate to an exact match..
+     */
+    return false;
+  }
+
+  /**
+   * Returns a sequence matching the given id, as follows
+   * <ul>
+   * <li>strict matching is on exact sequence name</li>
+   * <li>relaxed matching allows matching on a token within the sequence name,
+   * or a dbxref</li>
+   * <li>first tries to find a match in the alignment sequences</li>
+   * <li>else tries to find a match in the new sequences already generated while
+   * parsing the features file</li>
+   * <li>else creates a new placeholder sequence, adds it to the new sequences
+   * list, and returns it</li>
+   * </ul>
+   * 
+   * @param seqId
+   * @param align
+   * @param newseqs
+   * @param relaxedIdMatching
+   * 
+   * @return
+   */
+  protected SequenceI findSequence(String seqId, AlignmentI align,
+          List<SequenceI> newseqs, boolean relaxedIdMatching)
+  {
+    if (seqId == null)
+    {
+      return null;
+    }
+    SequenceI match = null;
+    if (relaxedIdMatching)
+    {
+      if (lastmatchedAl != align)
+      {
+        lastmatchedAl = align;
+        matcher = new SequenceIdMatcher(align.getSequencesArray());
+        if (newseqs != null)
+        {
+          matcher.addAll(newseqs);
+        }
+      }
+      match = matcher.findIdMatch(seqId);
+    }
+    else
+    {
+      match = align.findName(seqId, true);
+      if (match == null && newseqs != null)
+      {
+        for (SequenceI m : newseqs)
+        {
+          if (seqId.equals(m.getName()))
+          {
+            return m;
+          }
+        }
+      }
+
+    }
+    if (match == null && newseqs != null)
+    {
+      match = new SequenceDummy(seqId);
+      if (relaxedIdMatching)
+      {
+        matcher.addAll(Arrays.asList(new SequenceI[] { match }));
+      }
+      // add dummy sequence to the newseqs list
+      newseqs.add(match);
+    }
+    return match;
+  }
+
+  /**
+   * Parses the input line to a map of name / value(s) pairs. For example the
+   * line <br>
+   * Notes=Fe-S;Method=manual curation, prediction; source = Pfam; Notes = Metal <br>
+   * if parsed with delimiter=";" and separators {' ', '='} <br>
+   * would return a map with { Notes={Fe=S, Metal}, Method={manual curation,
+   * prediction}, source={Pfam}} <br>
+   * 
+   * This method supports parsing of either GFF2 format (which uses space ' ' as
+   * the name/value delimiter, and allows multiple occurrences of the same
+   * name), or GFF3 format (which uses '=' as the name/value delimiter, and
+   * strictly does not allow repeat occurrences of the same name - but does
+   * allow a comma-separated list of values).
+   * 
+   * @param text
+   * @param namesDelimiter
+   *          the major delimiter between name-value pairs
+   * @param nameValueSeparator
+   *          one or more separators used between name and value
+   * @param valuesDelimiter
+   *          delimits a list of more than one value
+   * @return the name-values map (which may be empty but never null)
+   */
+  public static Map<String, List<String>> parseNameValuePairs(String text,
+          String namesDelimiter, char nameValueSeparator,
+          String valuesDelimiter)
+  {
+    Map<String, List<String>> map = new HashMap<String, List<String>>();
+    if (text == null || text.trim().length() == 0)
+    {
+      return map;
+    }
+
+    for (String pair : text.trim().split(namesDelimiter))
+    {
+      pair = pair.trim();
+      if (pair.length() == 0)
+      {
+        continue;
+      }
+
+      int sepPos = pair.indexOf(nameValueSeparator);
+      if (sepPos == -1)
+      {
+        // no name=value present
+        continue;
+      }
+
+      String key = pair.substring(0, sepPos).trim();
+      String values = pair.substring(sepPos + 1).trim();
+      if (values.length() > 0)
+      {
+        List<String> vals = map.get(key);
+        if (vals == null)
+        {
+          vals = new ArrayList<String>();
+          map.put(key, vals);
+        }
+        for (String val : values.split(valuesDelimiter))
+        {
+          vals.add(val);
+        }
+      }
+    }
+    return map;
+  }
+
+  /**
+   * Constructs a SequenceFeature from the GFF column data. Subclasses may wish
+   * to call this method then adjust the SequenceFeature depending on the
+   * particular usage of different tools that generate GFF.
+   * 
+   * @param gff
+   * @param attributes
+   * @return
+   */
+  protected SequenceFeature buildSequenceFeature(String[] gff,
+          Map<String, List<String>> attributes)
+  {
+    try
+    {
+      int start = Integer.parseInt(gff[START_COL]);
+      int end = Integer.parseInt(gff[END_COL]);
+
+      /*
+       * default 'score' is 0 rather than Float.NaN as the latter currently
+       * disables the 'graduated colour => colour by label' option
+       */
+      float score = 0f;
+      try
+      {
+        score = Float.parseFloat(gff[SCORE_COL]);
+      } catch (NumberFormatException nfe)
+      {
+        // e.g. '.' - leave as zero
+      }
+
+      SequenceFeature sf = new SequenceFeature(gff[TYPE_COL],
+              gff[SOURCE_COL], start, end, score, gff[SOURCE_COL]);
+
+      sf.setStrand(gff[STRAND_COL]);
+
+      sf.setPhase(gff[PHASE_COL]);
+
+      if (attributes != null)
+      {
+        /*
+         * save 'raw' column 9 to allow roundtrip output as input
+         */
+        sf.setAttributes(gff[ATTRIBUTES_COL]);
+
+        /*
+         * Add attributes in column 9 to the sequence feature's 
+         * 'otherData' table; use Note as a best proxy for description
+         */
+        for (Entry<String, List<String>> attr : attributes.entrySet())
+        {
+          String values = StringUtils.listToDelimitedString(
+                  attr.getValue(), ",");
+          sf.setValue(attr.getKey(), values);
+          if (NOTE.equals(attr.getKey()))
+          {
+            sf.setDescription(values);
+          }
+        }
+      }
+
+      return sf;
+    } catch (NumberFormatException nfe)
+    {
+      System.err.println("Invalid number in gff: " + nfe.getMessage());
+      return null;
+    }
+  }
+
+  /**
+   * Returns the character used to separate attributes names from values in GFF
+   * column 9. This is space for GFF2, '=' for GFF3.
+   * 
+   * @return
+   */
+  protected abstract char getNameValueSeparator();
+
+  /**
+   * Returns any existing mapping held on the alignment between the given
+   * dataset sequences, or a new one if none found. This is a convenience method
+   * to facilitate processing multiple GFF lines that make up a single 'spliced'
+   * mapping, by extending the first mapping as the others are read.
+   * 
+   * @param align
+   * @param fromSeq
+   * @param toSeq
+   * @return
+   */
+  protected AlignedCodonFrame getMapping(AlignmentI align, SequenceI fromSeq, SequenceI toSeq)
+  {
+    AlignedCodonFrame acf = align.getMapping(fromSeq, toSeq);
+    if (acf == null)
+    {
+      acf = new AlignedCodonFrame();
+    }
+    return acf;
+  }
+
+}
diff --git a/src/jalview/io/gff/GffHelperFactory.java b/src/jalview/io/gff/GffHelperFactory.java
new file mode 100644 (file)
index 0000000..8bd5115
--- /dev/null
@@ -0,0 +1,70 @@
+package jalview.io.gff;
+
+
+/**
+ * A factory to serve instances of GFF helper classes
+ */
+public class GffHelperFactory
+{
+
+  /**
+   * Returns a class to process the GFF line based on inspecting its column
+   * data. This may return a general-purpose GFF2 or GFF3 helper, or a
+   * specialisation for a flavour of GFF generated by a particular tool.
+   * 
+   * @param gff
+   * @return
+   */
+  public static GffHelperI getHelper(String[] gff)
+  {
+    if (gff == null || gff.length < 6)
+    {
+      return null;
+    }
+
+    GffHelperI result = null;
+    if (ExonerateHelper.recognises(gff))
+    {
+      result = new ExonerateHelper();
+    }
+    else if (InterProScanHelper.recognises(gff))
+    {
+      result = new InterProScanHelper();
+    }
+    else if (looksLikeGff3(gff))
+    {
+      result = new Gff3Helper();
+    }
+    else
+    {
+      result = new Gff2Helper();
+    }
+
+    return result;
+  }
+
+  /**
+   * Heuristic rule: if column 9 seems to have Name=Value entries, assume this
+   * is GFF3. GFF3 uses '=' as name-value separator, GFF2 uses space ' '.
+   * 
+   * @param gff
+   * @return
+   */
+  protected static boolean looksLikeGff3(String[] gff)
+  {
+    if (gff.length >= 9)
+    {
+      String attributes = gff[8].trim();
+      int pos1 = attributes.indexOf(';');
+      int pos2 = attributes.indexOf('=');
+      if (pos2 != -1 && (pos1 == -1 || pos2 < pos1))
+      {
+        // there is an '=' before the first ';' (if any)
+        // not foolproof as theoretically GFF2 could be like "Name Value=123;"
+        return true;
+      }
+    }
+    return false;
+  }
+
+}
diff --git a/src/jalview/io/gff/GffHelperI.java b/src/jalview/io/gff/GffHelperI.java
new file mode 100644 (file)
index 0000000..3d9dc6f
--- /dev/null
@@ -0,0 +1,44 @@
+package jalview.io.gff;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * An interface to described common functionality of different flavours of GFF
+ * 
+ * @author gmcarstairs
+ *
+ */
+public interface GffHelperI
+{
+
+  final String RENAME_TOKEN = "$RENAME_TO$";
+
+  /**
+   * Process one GFF feature line
+   * 
+   * @param seq
+   *          the sequence with which this feature is associated
+   * @param gffColumns
+   *          the GFF column data
+   * @param align
+   *          the alignment we are adding GFF to
+   * @param newseqs
+   *          any new sequences referenced by the GFF
+   * @param relaxedIdMatching
+   *          if true, match word tokens in sequence names
+   * @return a SequenceFeature if one should be created, else null
+   * @throws IOException
+   */
+  SequenceFeature processGff(SequenceI seq, String[] gffColumns,
+          AlignmentI align,
+          List<SequenceI> newseqs, boolean relaxedIdMatching)
+          throws IOException;
+
+  // java 8 will allow static methods in interfaces:
+  // static boolean recognises(String [] columns);
+}
diff --git a/src/jalview/io/gff/InterProScanHelper.java b/src/jalview/io/gff/InterProScanHelper.java
new file mode 100644 (file)
index 0000000..3323e27
--- /dev/null
@@ -0,0 +1,117 @@
+package jalview.io.gff;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.StringUtils;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A handler to parse GFF in the format generated by InterProScan
+ */
+public class InterProScanHelper extends Gff3Helper
+{
+  private static final String INTER_PRO_SCAN = "InterProScan";
+
+  private static final String SIGNATURE_DESC = "signature_desc";
+
+  /**
+   * Process one GFF feature line (as modelled by SequenceFeature)
+   * 
+   * @param seq
+   *          the sequence with which this feature is associated
+   * @param gff
+   *          the gff column data
+   * @param align
+   *          the alignment we are adding GFF to
+   * @param newseqs
+   *          any new sequences referenced by the GFF
+   * @param relaxedIdMatching
+   *          if true, match word tokens in sequence names
+   * @return a sequence feature if one should be added to the sequence, else
+   *         null (i.e. it has been processed in another way e.g. to generate a
+   *         mapping)
+   * @throws IOException
+   */
+  @Override
+  public SequenceFeature processGff(SequenceI seq, String[] gff,
+          AlignmentI align, List<SequenceI> newseqs,
+          boolean relaxedIdMatching) throws IOException
+  {
+    /*
+     * ignore the 'polypeptide' match of the whole sequence
+     */
+    if (".".equals(gff[SOURCE_COL]))
+    {
+      return null;
+    }
+
+    return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
+  }
+
+  /**
+ * 
+ */
+  @Override
+  protected SequenceFeature buildSequenceFeature(String[] gff,
+          Map<String, List<String>> attributes)
+  {
+    SequenceFeature sf = super.buildSequenceFeature(gff, attributes);
+
+    /*
+     * signature_desc is a more informative source of description
+     */
+    List<String> desc = attributes.get(SIGNATURE_DESC);
+    String description = StringUtils.listToDelimitedString(desc, ", ");
+    if (description.length() > 0)
+    {
+      sf.setDescription(description);
+    }
+
+    /*
+     * Set sequence feature group as 'InterProScan', and type as the source
+     * database for this match (e.g. 'Pfam')
+     */
+    sf.setType(gff[SOURCE_COL]);
+    sf.setFeatureGroup(INTER_PRO_SCAN);
+
+    return sf;
+  }
+
+  /**
+   * Tests whether the GFF data looks like it was generated by InterProScan
+   * 
+   * @param columns
+   * @return
+   */
+  public static boolean recognises(String[] columns)
+  {
+    SequenceOntology so = SequenceOntology.getInstance();
+    String type = columns[TYPE_COL];
+    if (so.isProteinMatch(type)
+            || (".".equals(columns[SOURCE_COL]) && so.isPolypeptide(type)))
+    {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Overriden method, because InterProScan GFF has the target sequence id in
+   * GFF field 'ID' rather than the usual 'Target' :-O
+   */
+  @Override
+  protected String findTargetId(String target, Map<String, List<String>> set)
+  {
+    List<String> ids = set.get(ID);
+    if (ids == null || ids.size() != 1)
+    {
+      return null;
+    }
+    return ids.get(0);
+  }
+
+}
diff --git a/src/jalview/io/gff/SequenceOntology.java b/src/jalview/io/gff/SequenceOntology.java
new file mode 100644 (file)
index 0000000..685b83e
--- /dev/null
@@ -0,0 +1,407 @@
+package jalview.io.gff;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.biojava.nbio.ontology.Ontology;
+import org.biojava.nbio.ontology.Term;
+import org.biojava.nbio.ontology.Term.Impl;
+import org.biojava.nbio.ontology.Triple;
+import org.biojava.nbio.ontology.io.OboParser;
+import org.biojava.nbio.ontology.utils.Annotation;
+
+/**
+ * A wrapper class that parses the Sequence Ontology and exposes useful access
+ * methods. This version uses the BioJava parser.
+ */
+public class SequenceOntology
+{
+
+  /*
+   * selected commonly used values for quick reference
+   */
+  // SO:0000316
+  public static final String CDS = "CDS";
+
+  // SO:0001060
+  public static final String SEQUENCE_VARIANT = "sequence_variant";
+
+  // SO:0000147
+  public static final String EXON = "exon";
+
+  // SO:0000673
+  public static final String TRANSCRIPT = "transcript";
+
+  // SO:0000704
+  public static final String GENE = "gene";
+
+  /*
+   * singleton instance of this class
+   */
+  private static SequenceOntology instance;
+
+  /*
+   * the parsed Ontology data as modelled by BioJava
+   */
+  private Ontology ontology;
+
+  /*
+   * the ontology term for the isA relationship
+   */
+  private Term isA;
+
+  /*
+   * lookup of terms by user readable name (NB not guaranteed unique)
+   */
+  private Map<String, Term> termsByDescription;
+
+  /*
+   * Map where key is a Term and value is a (possibly empty) list of 
+   * all Terms to which the key has an 'isA' relationship, either
+   * directly or indirectly (A isA B isA C)
+   */
+  private Map<Term, List<Term>> termIsA;
+
+  /**
+   * Returns singleton instance
+   * 
+   * @return
+   */
+  public synchronized static SequenceOntology getInstance()
+  {
+    if (instance == null)
+    {
+      instance = new SequenceOntology();
+    }
+    return instance;
+  }
+
+  /**
+   * Private constructor to enforce use of singleton. Parses and caches the SO
+   * OBO data file.
+   */
+  private SequenceOntology()
+  {
+    termsByDescription = new HashMap<String, Term>();
+    termIsA = new HashMap<Term, List<Term>>();
+
+    loadOntologyZipFile("so-xp-simple.obo");
+  }
+
+  /**
+   * Loads the given ontology file from a zip file with ".zip" appended
+   * 
+   * @param ontologyFile
+   */
+  protected void loadOntologyZipFile(String ontologyFile)
+  {
+    ZipInputStream zipStream = null;
+    try
+    {
+      String zipFile = ontologyFile + ".zip";
+      System.out.println("Loading Sequence Ontology from " + zipFile);
+      InputStream inStream = this.getClass().getResourceAsStream(
+              "/" + zipFile);
+      zipStream = new ZipInputStream(new BufferedInputStream(inStream));
+      ZipEntry entry;
+      while ((entry = zipStream.getNextEntry()) != null)
+      {
+        if (entry.getName().equals(ontologyFile))
+        {
+          loadOboFile(zipStream);
+        }
+      }
+    } catch (Exception e)
+    {
+      e.printStackTrace();
+    } finally
+    {
+      closeStream(zipStream);
+    }
+  }
+
+  /**
+   * Closes the input stream, swallowing all exceptions
+   * 
+   * @param is
+   */
+  protected void closeStream(InputStream is)
+  {
+    if (is != null)
+    {
+      try
+      {
+        is.close();
+      } catch (IOException e)
+      {
+        // ignore
+      }
+    }
+  }
+
+  /**
+   * Reads, parses and stores the OBO file data
+   * 
+   * @param is
+   * @throws ParseException
+   * @throws IOException
+   */
+  protected void loadOboFile(InputStream is) throws ParseException,
+          IOException
+  {
+    BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
+    OboParser parser = new OboParser();
+    ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
+    isA = ontology.getTerm("is_a");
+    storeTermNames();
+  }
+
+  /**
+   * Stores a lookup table of terms by description. Note that description is not
+   * guaranteed unique. Where duplicate descriptions are found, try to discard
+   * the term that is flagged as obsolete. However we do store obsolete terms
+   * where there is no duplication of description.
+   */
+  protected void storeTermNames()
+  {
+    for (Term term : ontology.getTerms())
+    {
+      if (term instanceof Impl)
+      {
+        String description = term.getDescription();
+        if (description != null)
+        {
+          Term replaced = termsByDescription.get(description);
+          if (replaced != null)
+          {
+            boolean newTermIsObsolete = isObsolete(term);
+            boolean oldTermIsObsolete = isObsolete(replaced);
+            if (newTermIsObsolete && !oldTermIsObsolete)
+            {
+              System.err.println("Ignoring " + term.getName()
+                      + " as obsolete and duplicated by "
+                      + replaced.getName());
+              term = replaced;
+            }
+            else if (!newTermIsObsolete && oldTermIsObsolete)
+            {
+              System.err.println("Ignoring " + replaced.getName()
+                      + " as obsolete and duplicated by " + term.getName());
+            }
+            else
+            {
+            System.err.println("Warning: " + term.getName()
+                    + " has replaced " + replaced.getName()
+                    + " for lookup of '" + description + "'");
+            }
+          }
+          termsByDescription.put(description, term);
+        }
+      }
+    }
+  }
+
+  /**
+   * Answers true if the term has property "is_obsolete" with value true, else
+   * false
+   * 
+   * @param term
+   * @return
+   */
+  public static boolean isObsolete(Term term)
+  {
+    Annotation ann = term.getAnnotation();
+    if (ann != null)
+    {
+      try
+      {
+      if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
+      {
+          return true;
+        }
+      } catch (NoSuchElementException e)
+      {
+        // fall through to false
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Test whether the given Sequence Ontology term is nucleotide_match (either
+   * directly or via is_a relationship)
+   * 
+   * @param soTerm
+   *          SO name or description
+   * @return
+   */
+  public boolean isNucleotideMatch(String soTerm)
+  {
+    return isA(soTerm, "nucleotide_match");
+  }
+
+  /**
+   * Test whether the given Sequence Ontology term is protein_match (either
+   * directly or via is_a relationship)
+   * 
+   * @param soTerm
+   *          SO name or description
+   * @return
+   */
+  public boolean isProteinMatch(String soTerm)
+  {
+    return isA(soTerm, "protein_match");
+  }
+
+  /**
+   * Test whether the given Sequence Ontology term is polypeptide (either
+   * directly or via is_a relationship)
+   * 
+   * @param soTerm
+   *          SO name or description
+   * @return
+   */
+  public boolean isPolypeptide(String soTerm)
+  {
+    return isA(soTerm, "polypeptide");
+  }
+
+  /**
+   * Returns true if the given term has a (direct or indirect) 'isA'
+   * relationship with the parent
+   * 
+   * @param child
+   * @param parent
+   * @return
+   */
+  public boolean isA(String child, String parent)
+  {
+    /*
+     * optimise trivial checks like isA("CDS", "CDS")
+     */
+    if (child.equals(parent))
+    {
+      return true;
+    }
+
+    Term childTerm = getTerm(child);
+    Term parentTerm = getTerm(parent);
+
+    return termIsA(childTerm, parentTerm);
+  }
+
+  /**
+   * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
+   * 
+   * @param childTerm
+   * @param parentTerm
+   * @return
+   */
+  protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
+  {
+    /*
+     * null term could arise from a misspelled SO description
+     */
+    if (childTerm == null || parentTerm == null)
+    {
+      return false;
+    }
+
+    /*
+     * recursive search endpoint:
+     */
+    if (childTerm == parentTerm)
+    {
+      return true;
+    }
+
+    /*
+     * lazy initialisation - find all of a term's parents (recursively) 
+     * the first time this is called, and save them in a map.
+     */
+    if (!termIsA.containsKey(childTerm))
+    {
+      findParents(childTerm);
+    }
+
+    List<Term> parents = termIsA.get(childTerm);
+    for (Term parent : parents)
+    {
+      if (termIsA(parent, parentTerm))
+      {
+        /*
+         * add (great-)grandparents to parents list as they are discovered,
+         * for faster lookup next time
+         */
+        if (!parents.contains(parentTerm))
+        {
+          parents.add(parentTerm);
+        }
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
+   * empty) list.
+   * 
+   * @param childTerm
+   */
+  protected synchronized void findParents(Term childTerm)
+  {
+    List<Term> result = new ArrayList<Term>();
+    for (Triple triple : ontology.getTriples(childTerm, null, isA))
+    {
+      Term parent = triple.getObject();
+      result.add(parent);
+
+      /*
+       * and search for the parent's parents recursively
+       */
+      findParents(parent);
+    }
+    termIsA.put(childTerm, result);
+  }
+
+  /**
+   * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
+   * "sequence_location"), or null if not found.
+   * 
+   * @param child
+   * @return
+   */
+  protected Term getTerm(String nameOrDescription)
+  {
+    Term t = termsByDescription.get(nameOrDescription);
+    if (t == null)
+    {
+      try
+      {
+        t = ontology.getTerm(nameOrDescription);
+      } catch (NoSuchElementException e)
+      {
+        // not found
+      }
+    }
+    return t;
+  }
+
+  public boolean isSequenceVariant(String soTerm)
+  {
+    return isA(soTerm, "sequence_variant");
+  }
+}
index d613796..817ba9c 100644 (file)
@@ -25,8 +25,10 @@ import jalview.datamodel.SequenceI;
 import jalview.io.NewickFile;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.List;
+import java.util.Map;
 
 public class JalviewDataset
 {
@@ -55,7 +57,7 @@ public class JalviewDataset
   /**
    * @return the featureColours
    */
-  public Hashtable getFeatureColours()
+  public Map<String, Object> getFeatureColours()
   {
     return featureColours;
   }
@@ -64,7 +66,7 @@ public class JalviewDataset
    * @param featureColours
    *          the featureColours to set
    */
-  public void setFeatureColours(Hashtable featureColours)
+  public void setFeatureColours(Map<String, Object> featureColours)
   {
     this.featureColours = featureColours;
   }
@@ -185,7 +187,7 @@ public class JalviewDataset
   /**
    * current set of feature colours
    */
-  Hashtable featureColours;
+  Map<String, Object> featureColours;
 
   /**
    * original identity of each sequence in results
@@ -199,7 +201,7 @@ public class JalviewDataset
     seqDetails = new Hashtable();
     al = new ArrayList<AlignmentSet>();
     parentDataset = null;
-    featureColours = new Hashtable();
+    featureColours = new HashMap<String, Object>();
   }
 
   /**
@@ -207,9 +209,10 @@ public class JalviewDataset
    * 
    * @param parentAlignment
    */
-  public JalviewDataset(AlignmentI aldataset, Hashtable fc,
+  public JalviewDataset(AlignmentI aldataset, Map<String, Object> fc,
           Hashtable seqDets)
   {
+    // TODO not used - remove?
     this(aldataset, fc, seqDets, null);
   }
 
@@ -228,7 +231,7 @@ public class JalviewDataset
    *          (may be null) alignment to associate new annotation and trees
    *          with.
    */
-  public JalviewDataset(AlignmentI aldataset, Hashtable fc,
+  public JalviewDataset(AlignmentI aldataset, Map<String, Object> fc,
           Hashtable seqDets, AlignmentI parentAlignment)
   {
     this();
index a4ef77e..01369b9 100644 (file)
@@ -30,7 +30,7 @@ import jalview.io.packed.DataProvider.JvDataType;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Hashtable;
+import java.util.HashMap;
 import java.util.List;
 
 public class ParsePackedSet
@@ -66,7 +66,7 @@ public class ParsePackedSet
         String fmt = null;
         try
         {
-          fmt = new IdentifyFile().Identify(src, false);
+          fmt = new IdentifyFile().identify(src, false);
         } catch (Exception ex)
         {
           exerror = ex;
@@ -157,7 +157,7 @@ public class ParsePackedSet
         // if not, create one.
         if (context.featureColours == null)
         {
-          context.featureColours = new Hashtable();
+          context.featureColours = new HashMap<String, Object>();
         }
         try
         {
index 6e93f45..61491b2 100644 (file)
@@ -27,6 +27,7 @@ import jalview.io.VamsasAppDatastore;
 import jalview.util.UrlLink;
 
 import java.util.Enumeration;
+import java.util.Iterator;
 import java.util.Vector;
 
 import uk.ac.vamsas.objects.core.DataSetAnnotations;
@@ -71,6 +72,7 @@ public class Sequencefeature extends Rangetype
     doJvUpdate();
   }
 
+  @Override
   public void addToDocument()
   {
     DataSetAnnotations dsa = (DataSetAnnotations) vobj;
@@ -89,6 +91,7 @@ public class Sequencefeature extends Rangetype
     dataset.addDataSetAnnotations(dsa);
   }
 
+  @Override
   public void addFromDocument()
   {
     DataSetAnnotations dsa = (DataSetAnnotations) vobj;
@@ -106,6 +109,7 @@ public class Sequencefeature extends Rangetype
     bindjvvobj(sf, dsa);
   }
 
+  @Override
   public void conflict()
   {
     log.warn("Untested sequencefeature conflict code");
@@ -118,6 +122,7 @@ public class Sequencefeature extends Rangetype
     addToDocument(); // and create a new feature in the document
   }
 
+  @Override
   public void updateToDoc()
   {
     DataSetAnnotations dsa = (DataSetAnnotations) vobj;
@@ -144,6 +149,7 @@ public class Sequencefeature extends Rangetype
 
   }
 
+  @Override
   public void updateFromDoc()
   {
     DataSetAnnotations dsa = (DataSetAnnotations) vobj;
@@ -229,11 +235,11 @@ public class Sequencefeature extends Rangetype
     }
     if (feature.otherDetails != null)
     {
-      Enumeration iter = feature.otherDetails.keys();
+      Iterator<String> iter = feature.otherDetails.keySet().iterator();
       Vector props = dsa.getPropertyAsReference();
-      while (iter.hasMoreElements())
+      while (iter.hasNext())
       {
-        String key = (String) iter.nextElement();
+        String key = iter.next();
         if (!key.equalsIgnoreCase("score")
                 && !key.equalsIgnoreCase("status"))
         {
index 73d34c2..205b9c6 100755 (executable)
@@ -173,6 +173,10 @@ public class GAlignFrame extends JInternalFrame
 
   protected JMenuItem showTranslation = new JMenuItem();
 
+  protected JMenuItem showReverse = new JMenuItem();
+
+  protected JMenuItem showReverseComplement = new JMenuItem();
+
   protected JMenu showProducts = new JMenu();
 
   protected JMenuItem rnahelicesColour = new JMenuItem();
@@ -1686,6 +1690,25 @@ public class GAlignFrame extends JInternalFrame
         showTranslation_actionPerformed(e);
       }
     });
+    showReverse.setText(MessageManager.getString("label.reverse"));
+    showReverse.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        showReverse_actionPerformed(false);
+      }
+    });
+    showReverseComplement.setText(MessageManager
+            .getString("label.reverse_complement"));
+    showReverseComplement.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        showReverse_actionPerformed(true);
+      }
+    });
 
     JMenuItem extractScores = new JMenuItem(
             MessageManager.getString("label.extract_scores"));
@@ -2252,6 +2275,8 @@ public class GAlignFrame extends JInternalFrame
     calculateMenu.add(PCAMenuItem);
     calculateMenu.addSeparator();
     calculateMenu.add(showTranslation);
+    calculateMenu.add(showReverse);
+    calculateMenu.add(showReverseComplement);
     calculateMenu.add(showProducts);
     calculateMenu.add(autoCalculate);
     calculateMenu.add(sortByTree);
@@ -2320,6 +2345,16 @@ public class GAlignFrame extends JInternalFrame
   }
 
   /**
+   * Generate the reverse sequence (or reverse complement if the flag is true)
+   * and add it to the alignment
+   * 
+   * @param complement
+   */
+  protected void showReverse_actionPerformed(boolean complement)
+  {
+  }
+
+  /**
    * Adds the given action listener and key accelerator to the given menu item.
    * Also saves in a lookup table to support lookup of action by key stroke.
    * 
@@ -3107,7 +3142,7 @@ public class GAlignFrame extends JInternalFrame
     return this.splitFrame;
   }
 
-  protected void showComplement_actionPerformed(boolean state)
+  protected void showComplement_actionPerformed(boolean complement)
   {
   }
 }
index de99987..5eb2524 100755 (executable)
@@ -35,7 +35,7 @@ import java.util.Vector;
 
 public class ResidueProperties
 {
-  public static Hashtable<String, ScoreModelI> scoreMatrices = new Hashtable();
+  public static Hashtable<String, ScoreModelI> scoreMatrices = new Hashtable<String, ScoreModelI>();
 
   // Stores residue codes/names and colours and other things
   public static final int[] aaIndex; // aaHash version 2.1.1 and below
index 7aff05a..b1e4d58 100755 (executable)
@@ -67,7 +67,7 @@ public class UserColourScheme extends ResidueColourScheme
 
     if (col == null)
     {
-      System.out.println("Unknown colour!! " + colour);
+      System.out.println("Making colour from name: " + colour);
       col = createColourFromName(colour);
     }
 
index 2f962b5..871f076 100644 (file)
@@ -47,10 +47,8 @@ import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.IdentityHashMap;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Vector;
 
 import MCview.Atom;
@@ -78,7 +76,7 @@ public class StructureSelectionManager
   /*
    * Set of any registered mappings between (dataset) sequences.
    */
-  public Set<AlignedCodonFrame> seqmappings = new LinkedHashSet<AlignedCodonFrame>();
+  private List<AlignedCodonFrame> seqmappings = new ArrayList<AlignedCodonFrame>();
 
   private List<CommandListener> commandListeners = new ArrayList<CommandListener>();
 
@@ -1017,13 +1015,13 @@ public class StructureSelectionManager
   /**
    * Add each of the given codonFrames to the stored set, if not aready present.
    * 
-   * @param set
+   * @param mappings
    */
-  public void registerMappings(Set<AlignedCodonFrame> set)
+  public void registerMappings(List<AlignedCodonFrame> mappings)
   {
-    if (set != null)
+    if (mappings != null)
     {
-      for (AlignedCodonFrame acf : set)
+      for (AlignedCodonFrame acf : mappings)
       {
         registerMapping(acf);
       }
@@ -1261,4 +1259,9 @@ public class StructureSelectionManager
     progressIndicator.setProgressBar(message, progressSessionId);
   }
 
+  public List<AlignedCodonFrame> getSequenceMappings()
+  {
+    return seqmappings;
+  }
+
 }
index 518c310..c85a489 100755 (executable)
@@ -27,14 +27,21 @@ import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
 
 import com.stevesoft.pat.Regex;
 
+/**
+ * Utilities for handling DBRef objects and their collections.
+ */
 public class DBRefUtils
 {
+  /*
+   * lookup from lower-case form of a name to its canonical (standardised) form
+   */
   private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
 
   private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
@@ -46,6 +53,7 @@ public class DBRefUtils
             DBRefSource.UNIPROT);
     canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT);
     canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
+    canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);
 
     dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
     dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
@@ -54,39 +62,33 @@ public class DBRefUtils
   }
 
   /**
-   * Utilities for handling DBRef objects and their collections.
-   */
-  /**
    * 
    * @param dbrefs
-   *          Vector of DBRef objects to search
+   *          array of DBRef objects to search
    * @param sources
    *          String[] array of source DBRef IDs to retrieve
-   * @return Vector
+   * @return
    */
   public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
           String[] sources)
   {
-    if (dbrefs == null)
-    {
-      return null;
-    }
-    if (sources == null)
+    if (dbrefs == null || sources == null)
     {
       return dbrefs;
     }
-    Map<String, Integer> srcs = new HashMap<String, Integer>();
-    ArrayList<DBRefEntry> res = new ArrayList<DBRefEntry>();
-
-    for (int i = 0; i < sources.length; i++)
+    HashSet<String> srcs = new HashSet<String>();
+    for (String src : sources)
     {
-      srcs.put(new String(sources[i]), new Integer(i));
+      srcs.add(src);
     }
-    for (int i = 0, j = dbrefs.length; i < j; i++)
+
+    List<DBRefEntry> res = new ArrayList<DBRefEntry>();
+    for (DBRefEntry dbr : dbrefs)
     {
-      if (srcs.containsKey(dbrefs[i].getSource()))
+      String source = getCanonicalName(dbr.getSource());
+      if (srcs.contains(source))
       {
-        res.add(dbrefs[i]);
+        res.add(dbr);
       }
     }
 
@@ -95,8 +97,6 @@ public class DBRefUtils
       DBRefEntry[] reply = new DBRefEntry[res.size()];
       return res.toArray(reply);
     }
-    res = null;
-    // there are probable memory leaks in the hashtable!
     return null;
   }
 
@@ -200,8 +200,10 @@ public class DBRefUtils
   /**
    * match on all non-null fields in refa
    */
+  // TODO unused - remove?
   public static DbRefComp matchNonNullonA = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() == null
@@ -230,27 +232,18 @@ public class DBRefUtils
    * either field is null or field matches for all of source, version, accession
    * id and map.
    */
+  // TODO unused - remove?
   public static DbRefComp matchEitherNonNull = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if ((refa.getSource() == null || refb.getSource() == null)
-              || refb.getSource().equals(refa.getSource()))
+      if (nullOrEqual(refa.getSource(), refb.getSource())
+              && nullOrEqual(refa.getVersion(), refb.getVersion())
+              && nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
+              && nullOrEqual(refa.getMap(), refb.getMap()))
       {
-        if ((refa.getVersion() == null || refb.getVersion() == null)
-                || refb.getVersion().equals(refa.getVersion()))
-        {
-          if ((refa.getAccessionId() == null || refb.getAccessionId() == null)
-                  || refb.getAccessionId().equals(refa.getAccessionId()))
-          {
-            if ((refa.getMap() == null || refb.getMap() == null)
-                    || (refb.getMap() != null && refb.getMap().equals(
-                            refa.getMap())))
-            {
-              return true;
-            }
-          }
-        }
+        return true;
       }
       return false;
     }
@@ -260,18 +253,18 @@ public class DBRefUtils
    * accession ID and DB must be identical. Version is ignored. Map is either
    * not defined or is a match (or is compatible?)
    */
+  // TODO unused - remove?
   public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
               && refb.getSource().equals(refa.getSource()))
       {
         // We dont care about version
-        // if ((refa.getVersion()==null || refb.getVersion()==null)
-        // || refb.getVersion().equals(refa.getVersion()))
-        // {
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
+        // FIXME should be && not || here?
                 || refb.getAccessionId().equals(refa.getAccessionId()))
         {
           if ((refa.getMap() == null || refb.getMap() == null)
@@ -291,17 +284,16 @@ public class DBRefUtils
    * or map but no maplist on either or maplist of map on a is the complement of
    * maplist of map on b.
    */
+  // TODO unused - remove?
   public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
               && refb.getSource().equals(refa.getSource()))
       {
         // We dont care about version
-        // if ((refa.getVersion()==null || refb.getVersion()==null)
-        // || refb.getVersion().equals(refa.getVersion()))
-        // {
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
                 || refb.getAccessionId().equals(refa.getAccessionId()))
         {
@@ -328,8 +320,10 @@ public class DBRefUtils
    * or or map but no maplist on either or maplist of map on a is equivalent to
    * the maplist of map on b.
    */
+  // TODO unused - remove?
   public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
@@ -368,17 +362,13 @@ public class DBRefUtils
    */
   public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
   {
+    @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      // System.err.println("Comparing A: "+refa.getSrcAccString()+(refa.hasMap()?" has map.":"."));
-      // System.err.println("Comparing B: "+refb.getSrcAccString()+(refb.hasMap()?" has map.":"."));
       if (refa.getSource() != null && refb.getSource() != null
               && refb.getSource().equals(refa.getSource()))
       {
         // We dont care about version
-        // if ((refa.getVersion()==null || refb.getVersion()==null)
-        // || refb.getVersion().equals(refa.getVersion()))
-        // {
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
                 && refb.getAccessionId().equals(refa.getAccessionId()))
         {
@@ -472,4 +462,20 @@ public class DBRefUtils
     return ref;
   }
 
+  /**
+   * Returns true if either object is null, or they are equal
+   * 
+   * @param o1
+   * @param o2
+   * @return
+   */
+  public static boolean nullOrEqual(Object o1, Object o2)
+  {
+    if (o1 == null || o2 == null)
+    {
+      return true;
+    }
+    return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+  }
+
 }
index e456dab..bf66b91 100644 (file)
@@ -41,12 +41,12 @@ public class MapList
   /*
    * Subregions (base 1) described as { [start1, end1], [start2, end2], ...}
    */
-  private List<int[]> fromShifts = new ArrayList<int[]>();
+  private List<int[]> fromShifts;
 
   /*
    * Same format as fromShifts, for the 'mapped to' sequence
    */
-  private List<int[]> toShifts = new ArrayList<int[]>();
+  private List<int[]> toShifts;
 
   /*
    * number of steps in fromShifts to one toRatio unit
@@ -73,6 +73,15 @@ public class MapList
   private int toHighest;
 
   /**
+   * Constructor
+   */
+  public MapList()
+  {
+    fromShifts = new ArrayList<int[]>();
+    toShifts = new ArrayList<int[]>();
+  }
+
+  /**
    * Two MapList objects are equal if they are the same object, or they both
    * have populated shift ranges and all values are the same.
    */
@@ -180,7 +189,9 @@ public class MapList
   }
 
   /**
-   * Constructor.
+   * Constructor given from and to ranges as [start1, end1, start2, end2,...].
+   * If any end is equal to the next start, the ranges will be merged. There is
+   * no validation check that the ranges do not overlap each other.
    * 
    * @param from
    *          contiguous regions as [start1, end1, start2, end2, ...]
@@ -193,25 +204,51 @@ public class MapList
    */
   public MapList(int from[], int to[], int fromRatio, int toRatio)
   {
+    this();
     this.fromRatio = fromRatio;
     this.toRatio = toRatio;
-    fromLowest = from[0];
-    fromHighest = from[1];
+    fromLowest = Integer.MAX_VALUE;
+    fromHighest = Integer.MIN_VALUE;
+    int added = 0;
+
     for (int i = 0; i < from.length; i += 2)
     {
-      fromLowest = Math.min(fromLowest, from[i]);
-      fromHighest = Math.max(fromHighest, from[i + 1]);
-
-      fromShifts.add(new int[] { from[i], from[i + 1] });
+      /*
+       * note lowest and highest values - bearing in mind the
+       * direction may be revesed
+       */
+      fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1]));
+      fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1]));
+      if (added > 0 && from[i] == fromShifts.get(added - 1)[1])
+      {
+        /*
+         * this range starts where the last ended - just extend it
+         */
+        fromShifts.get(added - 1)[1] = from[i + 1];
+      }
+      else
+      {
+        fromShifts.add(new int[] { from[i], from[i + 1] });
+        added++;
+      }
     }
 
-    toLowest = to[0];
-    toHighest = to[1];
+    toLowest = Integer.MAX_VALUE;
+    toHighest = Integer.MIN_VALUE;
+    added = 0;
     for (int i = 0; i < to.length; i += 2)
     {
-      toLowest = Math.min(toLowest, to[i]);
-      toHighest = Math.max(toHighest, to[i + 1]);
-      toShifts.add(new int[] { to[i], to[i + 1] });
+      toLowest = Math.min(toLowest, Math.min(to[i], to[i + 1]));
+      toHighest = Math.max(toHighest, Math.max(to[i], to[i + 1]));
+      if (added > 0 && to[i] == toShifts.get(added - 1)[1])
+      {
+        toShifts.get(added - 1)[1] = to[i + 1];
+      }
+      else
+      {
+        toShifts.add(new int[] { to[i], to[i + 1] });
+        added++;
+      }
     }
   }
 
@@ -222,6 +259,7 @@ public class MapList
    */
   public MapList(MapList map)
   {
+    this();
     // TODO not used - remove?
     this.fromLowest = map.fromLowest;
     this.fromHighest = map.fromHighest;
@@ -247,7 +285,8 @@ public class MapList
   }
 
   /**
-   * Constructor given ranges as lists of [start, end] positions
+   * Constructor given ranges as lists of [start, end] positions. There is no
+   * validation check that the ranges do not overlap each other.
    * 
    * @param fromRange
    * @param toRange
@@ -257,26 +296,78 @@ public class MapList
   public MapList(List<int[]> fromRange, List<int[]> toRange, int fromRatio,
           int toRatio)
   {
+    this();
+    fromRange = coalesceRanges(fromRange);
+    toRange = coalesceRanges(toRange);
     this.fromShifts = fromRange;
     this.toShifts = toRange;
     this.fromRatio = fromRatio;
     this.toRatio = toRatio;
 
     fromLowest = Integer.MAX_VALUE;
-    fromHighest = 0;
+    fromHighest = Integer.MIN_VALUE;
     for (int[] range : fromRange)
     {
-      fromLowest = Math.min(fromLowest, range[0]);
-      fromHighest = Math.max(fromHighest, range[1]);
+      fromLowest = Math.min(fromLowest, Math.min(range[0], range[1]));
+      fromHighest = Math.max(fromHighest, Math.max(range[0], range[1]));
     }
 
     toLowest = Integer.MAX_VALUE;
-    toHighest = 0;
+    toHighest = Integer.MIN_VALUE;
     for (int[] range : toRange)
     {
-      toLowest = Math.min(toLowest, range[0]);
-      toHighest = Math.max(toHighest, range[1]);
+      toLowest = Math.min(toLowest, Math.min(range[0], range[1]));
+      toHighest = Math.max(toHighest, Math.max(range[0], range[1]));
+    }
+  }
+
+  /**
+   * Consolidates a list of ranges so that any contiguous ranges are merged
+   * 
+   * @param ranges
+   * @return
+   */
+  public static List<int[]> coalesceRanges(List<int[]> ranges)
+  {
+    if (ranges == null || ranges.size() < 2) {
+      return ranges;
+    }
+
+    boolean changed = false;
+    List<int[]> merged = new ArrayList<int[]>();
+    int[] lastRange = ranges.get(0);
+    int lastDirection = lastRange[1] >= lastRange[0] ? 1 : -1;
+    merged.add(lastRange);
+    
+    for (int[] range : ranges)
+    {
+      if (range == lastRange)
+      {
+        continue;
+      }
+      int direction = range[1] >= range[0] ? 1 : -1;
+
+      /*
+       * if next range is in the same direction as last and contiguous,
+       * just update the end position of the last range
+       */
+      if ((range[1] == range[0] || direction == lastDirection)
+              && (range[0] == lastRange[1] || range[0] == lastRange[1]
+                      + lastDirection))
+      {
+        lastRange[1] = range[1];
+        changed = true;
+      }
+      else
+      {
+        merged.add(range);
+        lastRange = range;
+        // careful: merging [5, 5] after [7, 6] should keep negative direction
+        lastDirection = (range[1] == range[0]) ? lastDirection : direction;
+      }
     }
+    
+    return changed ? merged : ranges;
   }
 
   /**
@@ -849,8 +940,7 @@ public class MapList
   public String toString()
   {
     StringBuilder sb = new StringBuilder(64);
-    sb.append("From (").append(fromRatio).append(":").append(toRatio)
-            .append(") [");
+    sb.append("[");
     for (int[] shift : fromShifts)
     {
       sb.append(" ").append(Arrays.toString(shift));
@@ -863,4 +953,102 @@ public class MapList
     sb.append(" ]");
     return sb.toString();
   }
+
+  /**
+   * Extend this map list by adding the given map's ranges. There is no
+   * validation check that the ranges do not overlap existing ranges (or each
+   * other), but contiguous ranges are merged.
+   * 
+   * @param map
+   */
+  public void addMapList(MapList map)
+  {
+    this.fromLowest = Math.min(fromLowest, map.fromLowest);
+    this.toLowest = Math.min(toLowest, map.toLowest);
+    this.fromHighest = Math.max(fromHighest, map.fromHighest);
+    this.toHighest = Math.max(toHighest, map.toHighest);
+
+    for (int[] range : map.getFromRanges())
+    {
+      addRange(range, fromShifts);
+    }
+    for (int[] range : map.getToRanges())
+    {
+      addRange(range, toShifts);
+    }
+  }
+
+  public static void addRange(int[] range, List<int[]> addTo)
+  {
+    /*
+     * list is empty - add to it!
+     */
+    if (addTo.size() == 0)
+    {
+      addTo.add(range);
+      return;
+    }
+
+    int[] last = addTo.get(addTo.size() - 1);
+    boolean lastForward = last[1] >= last[0];
+    boolean newForward = range[1] >= range[0];
+
+    /*
+     * contiguous range in the same direction - just update endpoint
+     */
+    if (lastForward == newForward && last[1] == range[0])
+    {
+      last[1] = range[1];
+      return;
+    }
+
+    /*
+     * next range starts at +1 in forward sense - update endpoint
+     */
+    if (lastForward && newForward && range[0] == last[1] + 1)
+    {
+      last[1] = range[1];
+      return;
+    }
+
+    /*
+     * next range starts at -1 in reverse sense - update endpoint
+     */
+    if (!lastForward && !newForward && range[0] == last[1] - 1)
+    {
+      last[1] = range[1];
+      return;
+    }
+
+    /*
+     * just add the new range
+     */
+    addTo.add(range);
+  }
+
+  /**
+   * Returns true if mapping is from forward strand, false if from reverse
+   * strand. Result is just based on the first 'from' range that is not a single
+   * position. Default is true unless proven to be false. Behaviour is not well
+   * defined if the mapping has a mixture of forward and reverse ranges.
+   * 
+   * @return
+   */
+  public boolean isFromForwardStrand()
+  {
+    boolean forwardStrand = true;
+    for (int[] range : getFromRanges())
+    {
+      if (range[1] > range[0])
+      {
+        break; // forward strand confirmed
+      }
+      else if (range[1] < range[0])
+      {
+        forwardStrand = false;
+        break; // reverse strand confirmed
+      }
+    }
+    return forwardStrand;
+  }
 }
index 45d166d..1bbfc73 100644 (file)
@@ -38,12 +38,11 @@ import jalview.datamodel.SequenceGroup;
 import jalview.datamodel.SequenceI;
 
 import java.util.ArrayList;
-import java.util.Collections;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 /**
  * Helper methods for manipulations involving sequence mappings.
@@ -69,7 +68,7 @@ public final class MappingUtils
    */
   protected static void mapCutOrPaste(Edit edit, boolean undo,
           List<SequenceI> targetSeqs, EditCommand result,
-          Set<AlignedCodonFrame> mappings)
+          List<AlignedCodonFrame> mappings)
   {
     Action action = edit.getAction();
     if (undo)
@@ -93,7 +92,7 @@ public final class MappingUtils
    */
   public static EditCommand mapEditCommand(EditCommand command,
           boolean undo, final AlignmentI mapTo, char gapChar,
-          Set<AlignedCodonFrame> mappings)
+          List<AlignedCodonFrame> mappings)
   {
     /*
      * For now, only support mapping from protein edits to cDna
@@ -165,7 +164,7 @@ public final class MappingUtils
           Map<SequenceI, SequenceI> originalSequences,
           final List<SequenceI> targetSeqs,
           Map<SequenceI, SequenceI> targetCopies, char gapChar,
-          EditCommand result, Set<AlignedCodonFrame> mappings)
+          EditCommand result, List<AlignedCodonFrame> mappings)
   {
     Action action = edit.getAction();
 
@@ -268,7 +267,7 @@ public final class MappingUtils
    * @return
    */
   public static SearchResults buildSearchResults(SequenceI seq, int index,
-          Set<AlignedCodonFrame> seqmappings)
+          List<AlignedCodonFrame> seqmappings)
   {
     SearchResults results = new SearchResults();
     addSearchResults(results, seq, index, seqmappings);
@@ -285,7 +284,7 @@ public final class MappingUtils
    * @param seqmappings
    */
   public static void addSearchResults(SearchResults results, SequenceI seq,
-          int index, Set<AlignedCodonFrame> seqmappings)
+          int index, List<AlignedCodonFrame> seqmappings)
   {
     if (index >= seq.getStart() && index <= seq.getEnd())
     {
@@ -314,7 +313,7 @@ public final class MappingUtils
      */
     boolean targetIsNucleotide = mapTo.isNucleotide();
     AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
-    Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+    List<AlignedCodonFrame> codonFrames = protein.getAlignment()
             .getCodonFrames();
     /*
      * Copy group name, colours etc, but not sequences or sequence colour scheme
@@ -375,15 +374,15 @@ public final class MappingUtils
               /*
                * Found a sequence mapping. Locate the start/end mapped residues.
                */
+              List<AlignedCodonFrame> mapping = Arrays.asList(new AlignedCodonFrame[] { acf });
               SearchResults sr = buildSearchResults(selected,
-                      startResiduePos, Collections.singleton(acf));
+                      startResiduePos, mapping);
               for (Match m : sr.getResults())
               {
                 mappedStartResidue = m.getStart();
                 mappedEndResidue = m.getEnd();
               }
-              sr = buildSearchResults(selected, endResiduePos,
-                      Collections.singleton(acf));
+              sr = buildSearchResults(selected, endResiduePos, mapping);
               for (Match m : sr.getResults())
               {
                 mappedStartResidue = Math.min(mappedStartResidue,
@@ -428,7 +427,7 @@ public final class MappingUtils
    * @return
    */
   public static CommandI mapOrderCommand(OrderCommand command,
-          boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
+          boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
   {
     SequenceI[] sortOrder = command.getSequenceOrder(undo);
     List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
@@ -512,7 +511,7 @@ public final class MappingUtils
   {
     boolean targetIsNucleotide = mapTo.isNucleotide();
     AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
-    Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+    List<AlignedCodonFrame> codonFrames = protein.getAlignment()
             .getCodonFrames();
     ColumnSelection mappedColumns = new ColumnSelection();
 
@@ -523,8 +522,6 @@ public final class MappingUtils
 
     char fromGapChar = mapFrom.getAlignment().getGapCharacter();
 
-    // FIXME allow for hidden columns
-
     /*
      * For each mapped column, find the range of columns that residues in that
      * column map to.
@@ -558,7 +555,7 @@ public final class MappingUtils
    * @param fromGapChar
    */
   protected static void mapHiddenColumns(int[] hidden,
-          Set<AlignedCodonFrame> mappings,
+          List<AlignedCodonFrame> mappings,
           ColumnSelection mappedColumns, List<SequenceI> fromSequences,
           List<SequenceI> toSequences, char fromGapChar)
   {
@@ -591,7 +588,8 @@ public final class MappingUtils
    * @param toSequences
    * @param fromGapChar
    */
-  protected static void mapColumn(int col, Set<AlignedCodonFrame> mappings,
+  protected static void mapColumn(int col,
+          List<AlignedCodonFrame> mappings,
           ColumnSelection mappedColumns, List<SequenceI> fromSequences,
           List<SequenceI> toSequences, char fromGapChar)
   {
@@ -625,7 +623,7 @@ public final class MappingUtils
    * @return
    */
   protected static int[] findMappedColumns(int col,
-          Set<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
+          List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
           List<SequenceI> toSequences, char fromGapChar)
   {
     int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
@@ -679,8 +677,8 @@ public final class MappingUtils
   }
 
   /**
-   * Returns the mapped codon for a given aligned sequence column position (base
-   * 0).
+   * Returns the mapped codon or codons for a given aligned sequence column
+   * position (base 0).
    * 
    * @param seq
    *          an aligned peptide sequence
@@ -688,26 +686,32 @@ public final class MappingUtils
    *          an aligned column position (base 0)
    * @param mappings
    *          a set of codon mappings
-   * @return the bases of the mapped codon in the cDNA dataset sequence, or null
-   *         if not found
+   * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
+   *         or an empty list if none found
    */
-  public static char[] findCodonFor(SequenceI seq, int col,
-          Set<AlignedCodonFrame> mappings)
+  public static List<char[]> findCodonsFor(SequenceI seq, int col,
+          List<AlignedCodonFrame> mappings)
   {
+    List<char[]> result = new ArrayList<char[]>();
     int dsPos = seq.findPosition(col);
     for (AlignedCodonFrame mapping : mappings)
     {
       if (mapping.involvesSequence(seq))
       {
-        return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
+        List<char[]> codons = mapping.getMappedCodons(
+                seq.getDatasetSequence(), dsPos);
+        if (codons != null)
+        {
+          result.addAll(codons);
+        }
       }
     }
-    return null;
+    return result;
   }
 
   /**
-   * Converts a series of [start, end] ranges into an array of individual
-   * positions.
+   * Converts a series of [start, end] range pairs into an array of individual
+   * positions. This also caters for 'reverse strand' (start > end) cases.
    * 
    * @param ranges
    * @return
@@ -720,17 +724,21 @@ public final class MappingUtils
     int count = 0;
     for (int i = 0; i < ranges.length - 1; i += 2)
     {
-      count += ranges[i + 1] - ranges[i] + 1;
+      count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
     }
 
     int[] result = new int[count];
     int k = 0;
     for (int i = 0; i < ranges.length - 1; i += 2)
     {
-      for (int j = ranges[i]; j <= ranges[i + 1]; j++)
+      int from = ranges[i];
+      final int to = ranges[i + 1];
+      int step = from <= to ? 1 : -1;
+      do
       {
-        result[k++] = j;
-      }
+        result[k++] = from;
+        from += step;
+      } while (from != to + step);
     }
     return result;
   }
@@ -744,7 +752,7 @@ public final class MappingUtils
    * @return
    */
   public static List<AlignedCodonFrame> findMappingsForSequence(
-          SequenceI sequence, Set<AlignedCodonFrame> mappings)
+          SequenceI sequence, List<AlignedCodonFrame> mappings)
   {
     List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
     if (sequence == null || mappings == null)
index 533e98b..6044655 100644 (file)
@@ -248,4 +248,55 @@ public class StringUtils
     }
     return "" + separator;
   }
+  
+  /**
+   * Converts a list to a string with a delimiter before each term except the
+   * first. Returns an empty string given a null or zero-length argument. This
+   * can be replaced with StringJoiner in Java 8.
+   * 
+   * @param terms
+   * @param delim
+   * @return
+   */
+  public static String listToDelimitedString(List<String> terms,
+          String delim)
+  {
+    StringBuilder sb = new StringBuilder(32);
+    if (terms != null && !terms.isEmpty())
+    {
+      boolean appended = false;
+      for (String term : terms)
+      {
+        if (appended)
+        {
+          sb.append(delim);
+        }
+        appended = true;
+        sb.append(term);
+      }
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Convenience method to parse a string to an integer, returning 0 if the
+   * input is null or not a valid integer
+   * 
+   * @param s
+   * @return
+   */
+  public static int parseInt(String s)
+  {
+    int result = 0;
+    if (s != null && s.length() > 0)
+    {
+      try
+      {
+        result = Integer.parseInt(s);
+      } catch (NumberFormatException ex)
+      {
+      }
+    }
+    return result;
+  }
 }
index c4ae76d..ab9740c 100644 (file)
@@ -65,7 +65,6 @@ import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 /**
  * base class holding visualization and analysis attributes and common logic for
@@ -837,15 +836,26 @@ public abstract class AlignmentViewport implements AlignViewportI,
 
     /*
      * A separate thread to compute cDNA consensus for a protein alignment
+     * which has mapping to cDNA
      */
     final AlignmentI al = this.getAlignment();
     if (!al.isNucleotide() && al.getCodonFrames() != null
             && !al.getCodonFrames().isEmpty())
     {
-      if (calculator
-              .getRegisteredWorkersOfClass(ComplementConsensusThread.class) == null)
+      /*
+       * fudge - check first mapping is protein-to-nucleotide
+       * (we don't want to do this for protein-to-protein)
+       */
+      AlignedCodonFrame mapping = al.getCodonFrames().iterator().next();
+      // TODO hold mapping type e.g. dna-to-protein in AlignedCodonFrame?
+      if (mapping.getdnaToProt()[0].getFromRatio() == 3)
       {
-        calculator.registerWorker(new ComplementConsensusThread(this, ap));
+        if (calculator
+                .getRegisteredWorkersOfClass(ComplementConsensusThread.class) == null)
+        {
+          calculator
+                  .registerWorker(new ComplementConsensusThread(this, ap));
+        }
       }
     }
   }
@@ -1424,7 +1434,7 @@ public abstract class AlignmentViewport implements AlignViewportI,
 
     if (hiddenRepSequences == null)
     {
-      hiddenRepSequences = new Hashtable();
+      hiddenRepSequences = new Hashtable<SequenceI, SequenceCollectionI>();
     }
 
     hiddenRepSequences.put(repSequence, sg);
@@ -1769,14 +1779,20 @@ public abstract class AlignmentViewport implements AlignViewportI,
   {
     if (!alignment.isNucleotide())
     {
-      final Set<AlignedCodonFrame> codonMappings = alignment
+      final List<AlignedCodonFrame> codonMappings = alignment
               .getCodonFrames();
       if (codonMappings != null && !codonMappings.isEmpty())
       {
-        complementConsensus = new AlignmentAnnotation("cDNA Consensus",
-                "PID for cDNA", new Annotation[1], 0f, 100f,
-                AlignmentAnnotation.BAR_GRAPH);
-        initConsensus(complementConsensus);
+        // fudge: check mappings are not protein-to-protein
+        // TODO: nicer
+        AlignedCodonFrame mapping = codonMappings.iterator().next();
+        if (mapping.getdnaToProt()[0].getFromRatio() == 3)
+        {
+          complementConsensus = new AlignmentAnnotation("cDNA Consensus",
+                  "PID for cDNA", new Annotation[1], 0f, 100f,
+                  AlignmentAnnotation.BAR_GRAPH);
+          initConsensus(complementConsensus);
+        }
       }
     }
   }
@@ -2569,7 +2585,7 @@ public abstract class AlignmentViewport implements AlignViewportI,
     {
       return 0;
     }
-    final Set<AlignedCodonFrame> mappings = proteinAlignment
+    final List<AlignedCodonFrame> mappings = proteinAlignment
             .getCodonFrames();
 
     /*
index fe28d6c..b158448 100644 (file)
@@ -32,6 +32,7 @@ import jalview.util.MessageManager;
 import jalview.viewmodel.seqfeatures.FeatureRendererSettings;
 
 import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Set;
 
 public abstract class AWSThread extends Thread
@@ -98,6 +99,7 @@ public abstract class AWSThread extends Thread
   /**
    * generic web service job/subjob poll loop
    */
+  @Override
   public void run()
   {
     JobStateSummary jstate = null;
@@ -378,7 +380,7 @@ public abstract class AWSThread extends Thread
     WsUrl = wsurl2;
     if (alframe != null)
     {
-      Set<AlignedCodonFrame> cf = alframe.getViewport().getAlignment()
+      List<AlignedCodonFrame> cf = alframe.getViewport().getAlignment()
               .getCodonFrames();
       if (cf != null)
       {
index 473d54f..dff1b98 100644 (file)
@@ -332,16 +332,8 @@ public class DBRefFetcher implements Runnable
         // TODO: introduce multithread multisource queries and logic to remove a
         // query from other sources if any source for a database returns a
         // record
-        if (dbsource.getDbSourceProperties().containsKey(
-                DBRefSource.MULTIACC))
-        {
-          maxqlen = ((Integer) dbsource.getDbSourceProperties().get(
-                  DBRefSource.MULTIACC)).intValue();
-        }
-        else
-        {
-          maxqlen = 1;
-        }
+        maxqlen = dbsource.getMaximumQueryCount();
+
         while (queries.size() > 0 || seqIndex < currSeqs.length)
         {
           if (queries.size() > 0)
index 6a612a0..909f515 100644 (file)
  */
 package jalview.ws;
 
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.DBRefSource;
-import jalview.datamodel.SequenceI;
+import jalview.ext.ensembl.EnsemblCdna;
+import jalview.ext.ensembl.EnsemblCds;
+import jalview.ext.ensembl.EnsemblGene;
+import jalview.ext.ensembl.EnsemblGenome;
+import jalview.ext.ensembl.EnsemblProtein;
+import jalview.ws.dbsources.EmblCdsSource;
+import jalview.ws.dbsources.EmblSource;
+import jalview.ws.dbsources.Pdb;
+import jalview.ws.dbsources.PfamFull;
+import jalview.ws.dbsources.PfamSeed;
+import jalview.ws.dbsources.RfamFull;
+import jalview.ws.dbsources.RfamSeed;
+import jalview.ws.dbsources.Uniprot;
+import jalview.ws.dbsources.UniprotName;
 import jalview.ws.dbsources.das.api.jalviewSourceI;
 import jalview.ws.seqfetcher.ASequenceFetcher;
 import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
-import java.util.Enumeration;
 import java.util.List;
-import java.util.Vector;
 
 /**
  * This is the the concrete implementation of the sequence retrieval interface
  * and abstract class in jalview.ws.seqfetcher. This implements the run-time
- * discovery of sequence database clients, and provides a hardwired main for
- * testing all registered handlers.
+ * discovery of sequence database clientss.
  * 
  */
 public class SequenceFetcher extends ASequenceFetcher
@@ -55,16 +62,21 @@ public class SequenceFetcher extends ASequenceFetcher
 
   public SequenceFetcher(boolean addDas)
   {
-    addDBRefSourceImpl(jalview.ws.dbsources.EmblSource.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.EmblCdsSouce.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.Uniprot.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.UnprotName.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.Pdb.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.PfamFull.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.PfamSeed.class);
+    addDBRefSourceImpl(EnsemblProtein.class);
+    addDBRefSourceImpl(EnsemblCds.class);
+    addDBRefSourceImpl(EnsemblGenome.class);
+    addDBRefSourceImpl(EnsemblGene.class);
+    addDBRefSourceImpl(EnsemblCdna.class);
+    addDBRefSourceImpl(EmblSource.class);
+    addDBRefSourceImpl(EmblCdsSource.class);
+    addDBRefSourceImpl(Uniprot.class);
+    addDBRefSourceImpl(UniprotName.class);
+    addDBRefSourceImpl(Pdb.class);
+    addDBRefSourceImpl(PfamFull.class);
+    addDBRefSourceImpl(PfamSeed.class);
     // ensures Seed alignment is 'default' for PFAM
-    addDBRefSourceImpl(jalview.ws.dbsources.RfamFull.class);
-    addDBRefSourceImpl(jalview.ws.dbsources.RfamSeed.class);
+    addDBRefSourceImpl(RfamFull.class);
+    addDBRefSourceImpl(RfamSeed.class);
     if (addDas)
     {
       registerDasSequenceSources();
@@ -87,7 +99,7 @@ public class SequenceFetcher extends ASequenceFetcher
       {
         // Skip the alignment databases for the moment - they're not useful for
         // verifying a single sequence against its reference source
-        if (dbs.isA(DBRefSource.ALIGNMENTDB))
+        if (dbs.isAlignmentSource())
         {
           skip = true;
         }
@@ -150,283 +162,6 @@ public class SequenceFetcher extends ASequenceFetcher
   }
 
   /**
-   * return plaintext databse list suitable for using in a GUI element
-   */
-  public String[] _getOrderedSupportedSources()
-  {
-    String[] srcs = this.getSupportedDb();
-    ArrayList dassrc = new ArrayList(), nondas = new ArrayList();
-    for (int i = 0; i < srcs.length; i++)
-    {
-      for (DbSourceProxy dbs : getSourceProxy(srcs[i]))
-      {
-        String nm = dbs.getDbName();
-        if (getSourceProxy(srcs[i]) instanceof jalview.ws.dbsources.das.datamodel.DasSequenceSource)
-        {
-          if (nm.startsWith("das:"))
-          {
-            nm = nm.substring(4);
-          }
-          dassrc.add(new String[] { srcs[i], nm.toUpperCase() });
-        }
-        else
-        {
-          nondas.add(new String[] { srcs[i], nm.toUpperCase() });
-        }
-      }
-    }
-    Object[] sorted = nondas.toArray();
-    String[] tosort = new String[sorted.length];
-    nondas.clear();
-    for (int j = 0; j < sorted.length; j++)
-    {
-      tosort[j] = ((String[]) sorted[j])[1];
-    }
-    jalview.util.QuickSort.sort(tosort, sorted);
-    int i = 0;
-    // construct array with all sources listed
-    srcs = new String[sorted.length + dassrc.size()];
-    for (int j = sorted.length - 1; j >= 0; j--, i++)
-    {
-      srcs[i] = ((String[]) sorted[j])[0];
-      sorted[j] = null;
-    }
-
-    sorted = dassrc.toArray();
-    tosort = new String[sorted.length];
-    dassrc.clear();
-    for (int j = 0; j < sorted.length; j++)
-    {
-      tosort[j] = ((String[]) sorted[j])[1];
-    }
-    jalview.util.QuickSort.sort(tosort, sorted);
-    for (int j = sorted.length - 1; j >= 0; j--, i++)
-    {
-      srcs[i] = ((String[]) sorted[j])[0];
-      sorted[j] = null;
-    }
-    return srcs;
-  }
-
-  /**
-   * simple run method to test dbsources.
-   * 
-   * @param argv
-   */
-  public static void main(String[] argv)
-  {
-    AlignmentI ds = null;
-    Vector noProds = new Vector();
-    String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
-            + "With no arguments, all DbSources will be queried with their test Accession number.\n"
-            + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
-            + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
-            + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
-    boolean withDas = true;
-    if (argv != null && argv.length > 0
-            && argv[0].toLowerCase().startsWith("-nodas"))
-    {
-      withDas = false;
-      String targs[] = new String[argv.length - 1];
-      System.arraycopy(argv, 1, targs, 0, targs.length);
-      argv = targs;
-    }
-    if (argv != null && argv.length > 0)
-    {
-      List<DbSourceProxy> sps = new SequenceFetcher(withDas)
-              .getSourceProxy(argv[0]);
-
-      if (sps != null)
-      {
-        for (DbSourceProxy sp : sps)
-        {
-          AlignmentI al = null;
-          try
-          {
-            al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
-                    .getTestQuery());
-          } catch (Exception e)
-          {
-            e.printStackTrace();
-            System.err.println("Error when retrieving "
-                    + (argv.length > 1 ? argv[1] : sp.getTestQuery())
-                    + " from " + argv[0] + "\nUsage: " + usage);
-          }
-          SequenceI[] prod = al.getSequencesArray();
-          if (al != null)
-          {
-            for (int p = 0; p < prod.length; p++)
-            {
-              System.out.println("Prod " + p + ": "
-                      + prod[p].getDisplayId(true) + " : "
-                      + prod[p].getDescription());
-            }
-          }
-        }
-        return;
-      }
-      else
-      {
-        System.err.println("Can't resolve " + argv[0]
-                + " as a database name. Allowed values are :\n"
-                + new SequenceFetcher().getSupportedDb());
-      }
-      System.out.println(usage);
-      return;
-    }
-    ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
-    String[] dbSources = sfetcher.getSupportedDb();
-    for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
-    {
-      String db = dbSources[dbsource];
-      // skip me
-      if (db.equals(DBRefSource.PDB))
-      {
-        continue;
-      }
-      for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
-      {
-        System.out.println("Source: " + sp.getDbName() + " (" + db
-                + "): retrieving test:" + sp.getTestQuery());
-        AlignmentI al = null;
-        try
-        {
-          al = sp.getSequenceRecords(sp.getTestQuery());
-          if (al != null && al.getHeight() > 0
-                  && sp.getDbSourceProperties() != null)
-          {
-            boolean dna = sp.getDbSourceProperties().containsKey(
-                    DBRefSource.DNACODINGSEQDB)
-                    || sp.getDbSourceProperties().containsKey(
-                            DBRefSource.DNASEQDB)
-                    || sp.getDbSourceProperties().containsKey(
-                            DBRefSource.CODINGSEQDB);
-            // try and find products
-            String types[] = jalview.analysis.CrossRef
-                    .findSequenceXrefTypes(dna, al.getSequencesArray());
-            if (types != null)
-            {
-              System.out.println("Xref Types for: "
-                      + (dna ? "dna" : "prot"));
-              for (int t = 0; t < types.length; t++)
-              {
-                System.out.println("Type: " + types[t]);
-                SequenceI[] prod = jalview.analysis.CrossRef
-                        .findXrefSequences(al.getSequencesArray(), dna,
-                                types[t]).getSequencesArray();
-                System.out.println("Found "
-                        + ((prod == null) ? "no" : "" + prod.length)
-                        + " products");
-                if (prod != null)
-                {
-                  for (int p = 0; p < prod.length; p++)
-                  {
-                    System.out.println("Prod " + p + ": "
-                            + prod[p].getDisplayId(true));
-                  }
-                }
-              }
-            }
-            else
-            {
-              noProds.addElement((dna ? new Object[] { al, al }
-                      : new Object[] { al }));
-            }
-
-          }
-        } catch (Exception ex)
-        {
-          System.out.println("ERROR:Failed to retrieve test query.");
-          ex.printStackTrace(System.out);
-        }
-
-        if (al == null)
-        {
-          System.out.println("ERROR:No alignment retrieved.");
-          StringBuffer raw = sp.getRawRecords();
-          if (raw != null)
-          {
-            System.out.println(raw.toString());
-          }
-          else
-          {
-            System.out.println("ERROR:No Raw results.");
-          }
-        }
-        else
-        {
-          System.out.println("Retrieved " + al.getHeight() + " sequences.");
-          for (int s = 0; s < al.getHeight(); s++)
-          {
-            SequenceI sq = al.getSequenceAt(s);
-            while (sq.getDatasetSequence() != null)
-            {
-              sq = sq.getDatasetSequence();
-
-            }
-            if (ds == null)
-            {
-              ds = new Alignment(new SequenceI[] { sq });
-
-            }
-            else
-            {
-              ds.addSequence(sq);
-            }
-          }
-        }
-        System.out.flush();
-        System.err.flush();
-
-      }
-      if (noProds.size() > 0)
-      {
-        Enumeration ts = noProds.elements();
-        while (ts.hasMoreElements())
-
-        {
-          Object[] typeSq = (Object[]) ts.nextElement();
-          boolean dna = (typeSq.length > 1);
-          AlignmentI al = (AlignmentI) typeSq[0];
-          System.out.println("Trying getProducts for "
-                  + al.getSequenceAt(0).getDisplayId(true));
-          System.out.println("Search DS Xref for: "
-                  + (dna ? "dna" : "prot"));
-          // have a bash at finding the products amongst all the retrieved
-          // sequences.
-          SequenceI[] seqs = al.getSequencesArray();
-          Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
-                  seqs, dna, null, ds);
-          System.out.println("Found "
-                  + ((prodal == null) ? "no" : "" + prodal.getHeight())
-                  + " products");
-          if (prodal != null)
-          {
-            SequenceI[] prod = prodal.getSequencesArray(); // note
-            // should
-            // test
-            // rather
-            // than
-            // throw
-            // away
-            // codon
-            // mapping
-            // (if
-            // present)
-            for (int p = 0; p < prod.length; p++)
-            {
-              System.out.println("Prod " + p + ": "
-                      + prod[p].getDisplayId(true));
-            }
-          }
-        }
-
-      }
-
-    }
-  }
-
-  /**
    * query the currently defined DAS source registry for sequence sources and
    * add a DasSequenceSource instance for each source to the SequenceFetcher
    * source list.
similarity index 89%
rename from src/jalview/ws/dbsources/EmblCdsSouce.java
rename to src/jalview/ws/dbsources/EmblCdsSource.java
index e5fbd6c..a73af61 100644 (file)
@@ -22,40 +22,43 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
-public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
+public class EmblCdsSource extends EmblXmlSource
 {
 
-  public EmblCdsSouce()
+  public EmblCdsSource()
   {
     super();
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
   }
 
+  @Override
   public String getAccessionSeparator()
   {
     return null;
   }
 
+  @Override
   public Regex getAccessionValidator()
   {
-    return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+    return new Regex("^[A-Z]+[0-9]+");
   }
 
+  @Override
   public String getDbSource()
   {
     return DBRefSource.EMBLCDS;
   }
 
+  @Override
   public String getDbVersion()
   {
     return "0"; // TODO : this is dynamically set for a returned record - not
     // tied to proxy
   }
 
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     if (queries.indexOf(".") > -1)
@@ -65,6 +68,7 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
     return getEmblSequenceRecords(DBRefSource.EMBLCDS, queries);
   }
 
+  @Override
   public boolean isValidReference(String accession)
   {
     // most embl CDS refs look like ..
@@ -76,11 +80,13 @@ public class EmblCdsSouce extends EmblXmlSource implements DbSourceProxy
   /**
    * cDNA for LDHA_CHICK swissprot sequence
    */
+  @Override
   public String getTestQuery()
   {
     return "CAA37824";
   }
 
+  @Override
   public String getDbName()
   {
     return "EMBL (CDS)";
index 893ab41..6bbe2e1 100644 (file)
@@ -22,7 +22,6 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
@@ -30,13 +29,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class EmblSource extends EmblXmlSource implements DbSourceProxy
+public class EmblSource extends EmblXmlSource
 {
 
   public EmblSource()
   {
-    addDbSourceProperty(DBRefSource.DNASEQDB);
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
+    super();
   }
 
   /*
@@ -44,6 +42,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -55,9 +54,10 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
-    return new com.stevesoft.pat.Regex("^[A-Z]+[0-9]+");
+    return new Regex("^[A-Z]+[0-9]+");
   }
 
   /*
@@ -65,6 +65,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.EMBL;
@@ -75,6 +76,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     // TODO Auto-generated method stub
@@ -86,6 +88,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     return getEmblSequenceRecords(DBRefSource.EMBL, queries);
@@ -96,6 +99,7 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     // most embl refs look like ..
@@ -108,11 +112,13 @@ public class EmblSource extends EmblXmlSource implements DbSourceProxy
   /**
    * return LHD_CHICK coding gene
    */
+  @Override
   public String getTestQuery()
   {
     return "X53828";
   }
 
+  @Override
   public String getDbName()
   {
     return "EMBL"; // getDbSource();
index 20da45c..66ebe1b 100644 (file)
@@ -155,4 +155,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     return al;
   }
 
+  @Override
+  public boolean isDnaCoding()
+  {
+    return true;
+  }
+
 }
index 116962b..ce21ad0 100644 (file)
@@ -22,7 +22,6 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefSource;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import com.stevesoft.pat.Regex;
 
@@ -32,13 +31,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
+public class GeneDbSource extends EmblXmlSource
 {
 
   public GeneDbSource()
   {
-    addDbSourceProperty(DBRefSource.DNASEQDB);
-    addDbSourceProperty(DBRefSource.CODINGSEQDB);
+    super();
   }
 
   /*
@@ -46,6 +44,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -57,6 +56,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -68,6 +68,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.GENEDB;
@@ -78,6 +79,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     // TODO Auto-generated method stub
@@ -89,6 +91,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // query of form
@@ -102,6 +105,7 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     // TODO Auto-generated method stub
@@ -111,11 +115,13 @@ public class GeneDbSource extends EmblXmlSource implements DbSourceProxy
   /**
    * return T.Brucei Mannosyl-Transferase TbPIG-M
    */
+  @Override
   public String getTestQuery()
   {
     return "Tb927.6.3300";
   }
 
+  @Override
   public String getDbName()
   {
     return "GeneDB"; // getDbSource();
index 7f8c76c..3fd7541 100644 (file)
@@ -29,7 +29,6 @@ import jalview.datamodel.SequenceI;
 import jalview.io.FormatAdapter;
 import jalview.util.MessageManager;
 import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -41,12 +40,11 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
+public class Pdb extends EbiFileRetrievedProxy
 {
   public Pdb()
   {
     super();
-    addDbSourceProperty(DBRefSource.PROTSEQDB);
   }
 
   /*
@@ -54,6 +52,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -65,6 +64,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     return new Regex("([1-9][0-9A-Za-z]{3}):?([ _A-Za-z0-9]?)");
@@ -75,6 +75,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
     return DBRefSource.PDB;
@@ -85,6 +86,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getDbVersion()
    */
+  @Override
   public String getDbVersion()
   {
     return "0";
@@ -95,6 +97,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     AlignmentI pdbAlignment = null;
@@ -219,6 +222,7 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     Regex r = getAccessionValidator();
@@ -228,11 +232,13 @@ public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy
   /**
    * obtain human glyoxalase chain A sequence
    */
+  @Override
   public String getTestQuery()
   {
     return "1QIPA";
   }
 
+  @Override
   public String getDbName()
   {
     return "PDB"; // getDbSource();
index 0211bb1..4f081ee 100644 (file)
@@ -22,7 +22,8 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
+import jalview.io.FormatAdapter;
 
 import com.stevesoft.pat.Regex;
 
@@ -34,15 +35,12 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-abstract public class Pfam extends Xfam implements DbSourceProxy
+abstract public class Pfam extends Xfam
 {
 
   public Pfam()
   {
     super();
-    // all extensions of this PFAM source base class are DOMAINDB sources
-    addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
-    addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
   }
 
   /*
@@ -50,6 +48,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -61,6 +60,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -111,22 +111,23 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
    */
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // TODO: this is not a perfect implementation. We need to be able to add
     // individual references to each sequence in each family alignment that's
     // retrieved.
     startQuery();
-    AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
+    AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
             + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
             "STH");
     for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
     {
       rcds.getSequenceAt(s).addDBRef(
-              new DBRefEntry(jalview.datamodel.DBRefSource.PFAM,
+new DBRefEntry(DBRefSource.PFAM,
               // getDbSource(),
                       getDbVersion(), queries.trim().toUpperCase()));
-      if (!getDbSource().equals(jalview.datamodel.DBRefSource.PFAM))
+      if (!getDbSource().equals(DBRefSource.PFAM))
       { // add the specific ref too
         rcds.getSequenceAt(s).addDBRef(
                 new DBRefEntry(getDbSource(), getDbVersion(), queries
@@ -142,6 +143,7 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     return accession.indexOf("PF") == 0;
@@ -151,9 +153,10 @@ abstract public class Pfam extends Xfam implements DbSourceProxy
    * public String getDbName() { return "PFAM"; // getDbSource(); }
    */
 
+  @Override
   public String getXfamSource()
   {
-    return jalview.datamodel.DBRefSource.PFAM;
+    return DBRefSource.PFAM;
   }
 
 }
index 0490291..4f5b8f5 100644 (file)
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * flyweight class specifying retrieval of Full family alignments from PFAM
  * 
  */
-public class PfamFull extends Pfam implements DbSourceProxy
+public class PfamFull extends Pfam
 {
   public PfamFull()
   {
@@ -38,6 +37,7 @@ public class PfamFull extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Pfam#getPFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=full&format=stockholm&order=t&case=l&gaps=default&entry=";
@@ -48,21 +48,25 @@ public class PfamFull extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "PFAM (Full)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     return "PF03760";
   }
 
+  @Override
   public String getDbVersion()
   {
     return null;
index 2ea75af..be8f044 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * flyweight class specifying retrieval of Seed alignments from PFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author JimP
  * 
  */
-public class PfamSeed extends Pfam implements DbSourceProxy
+public class PfamSeed extends Pfam
 {
   public PfamSeed()
   {
@@ -40,6 +39,7 @@ public class PfamSeed extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Pfam#getPFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://pfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&format=stockholm&order=t&case=l&gaps=default&entry=";
@@ -50,16 +50,19 @@ public class PfamSeed extends Pfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "PFAM (Seed)";
   }
 
+  @Override
   public String getDbSource()
   {
     return jalview.datamodel.DBRefSource.PFAM; // archetype source
   }
 
+  @Override
   public String getTestQuery()
   {
     return "PF03760";
index b07b8ea..97f73d0 100644 (file)
@@ -20,7 +20,7 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
+import jalview.datamodel.DBRefSource;
 
 import com.stevesoft.pat.Regex;
 
@@ -29,15 +29,12 @@ import com.stevesoft.pat.Regex;
  * 
  * @author Lauren Michelle Lui
  */
-abstract public class Rfam extends Xfam implements DbSourceProxy
+abstract public class Rfam extends Xfam
 {
 
   public Rfam()
   {
     super();
-    // all extensions of this RFAM source base class are DOMAINDB sources
-    addDbSourceProperty(jalview.datamodel.DBRefSource.DOMAINDB);
-    addDbSourceProperty(jalview.datamodel.DBRefSource.ALIGNMENTDB);
   }
 
   /*
@@ -46,6 +43,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * @see jalview.ws.DbSourceProxy#getAccessionSeparator() Left here for
    * consistency with Pfam class
    */
+  @Override
   public String getAccessionSeparator()
   {
     // TODO Auto-generated method stub
@@ -57,6 +55,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#getAccessionValidator() * Left here for
    */
+  @Override
   public Regex getAccessionValidator()
   {
     // TODO Auto-generated method stub
@@ -100,6 +99,7 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
    */
+  @Override
   public boolean isValidReference(String accession)
   {
     return accession.indexOf("RF") == 0;
@@ -110,9 +110,10 @@ abstract public class Rfam extends Xfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Xfam#getXfamSource()
    */
+  @Override
   public String getXfamSource()
   {
-    return jalview.datamodel.DBRefSource.RFAM;
+    return DBRefSource.RFAM;
   }
 
 }
index 74f4ec6..e1e9e9a 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * Flyweight class specifying retrieval of Full family alignments from RFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author Lauren Michelle Lui
  * 
  */
-public class RfamFull extends Rfam implements DbSourceProxy
+public class RfamFull extends Rfam
 {
   public RfamFull()
   {
@@ -40,6 +39,7 @@ public class RfamFull extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Rfam#getXFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=full&nseLabels=0&format=stockholm&acc=";
@@ -50,16 +50,19 @@ public class RfamFull extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "RFAM (Full)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     // Can be retrieved from http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
@@ -68,6 +71,7 @@ public class RfamFull extends Rfam implements DbSourceProxy
     return "RF00014";
   }
 
+  @Override
   public String getDbVersion()
   {
     return null;
index dd2b12f..2850fd5 100644 (file)
@@ -20,7 +20,6 @@
  */
 package jalview.ws.dbsources;
 
-import jalview.ws.seqfetcher.DbSourceProxy;
 
 /**
  * Flyweight class specifying retrieval of Seed family alignments from RFAM
@@ -28,7 +27,7 @@ import jalview.ws.seqfetcher.DbSourceProxy;
  * @author Lauren Michelle Lui
  * 
  */
-public class RfamSeed extends Rfam implements DbSourceProxy
+public class RfamSeed extends Rfam
 {
   public RfamSeed()
   {
@@ -40,6 +39,7 @@ public class RfamSeed extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.dbsources.Rfam#getRFAMURL()
    */
+  @Override
   protected String getXFAMURL()
   {
     return "http://rfam.sanger.ac.uk/family/alignment/download/format?alnType=seed&nseLabels=0&format=stockholm&acc=";
@@ -52,21 +52,25 @@ public class RfamSeed extends Rfam implements DbSourceProxy
    * 
    * @see jalview.ws.seqfetcher.DbSourceProxy#getDbName()
    */
+  @Override
   public String getDbName()
   {
     return "RFAM (Seed)";
   }
 
+  @Override
   public String getDbSource()
   {
     return getDbName(); // so we have unique DbSource string.
   }
 
+  @Override
   public String getTestQuery()
   {
     return "RF00014";
   } // http://rfam.janelia.org/cgi-bin/getdesc?acc=RF00014
 
+  @Override
   public String getDbVersion()
   {
     return null;
index 843828b..02da009 100644 (file)
@@ -31,15 +31,16 @@ import jalview.datamodel.SequenceI;
 import jalview.datamodel.UniprotEntry;
 import jalview.datamodel.UniprotFile;
 import jalview.ws.ebi.EBIFetchClient;
-import jalview.ws.seqfetcher.DbSourceProxy;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 import java.io.File;
 import java.io.FileReader;
 import java.io.Reader;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Vector;
 
+import org.exolab.castor.mapping.Mapping;
 import org.exolab.castor.xml.Unmarshaller;
 
 import com.stevesoft.pat.Regex;
@@ -48,12 +49,14 @@ import com.stevesoft.pat.Regex;
  * @author JimP
  * 
  */
-public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
+public class Uniprot extends DbSourceProxyImpl
 {
-
   private static final String BAR_DELIMITER = "|";
 
-  private static org.exolab.castor.mapping.Mapping map;
+  /*
+   * Castor mapping loaded from uniprot_mapping.xml
+   */
+  private static Mapping map;
 
   /**
    * Constructor
@@ -61,8 +64,6 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
   public Uniprot()
   {
     super();
-    addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
-    addDbSourceProperty(DBRefSource.PROTSEQDB);
   }
 
   /*
@@ -125,9 +126,8 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
       if (map == null)
       {
         // 1. Load the mapping information from the file
-        map = new org.exolab.castor.mapping.Mapping(uni.getClass()
-                .getClassLoader());
-        java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
+        map = new Mapping(uni.getClass().getClassLoader());
+        URL url = getClass().getResource("/uniprot_mapping.xml");
         map.loadMapping(url);
       }
 
similarity index 88%
rename from src/jalview/ws/dbsources/UnprotName.java
rename to src/jalview/ws/dbsources/UniprotName.java
index 5dbc960..fa693c0 100644 (file)
@@ -20,6 +20,8 @@
  */
 package jalview.ws.dbsources;
 
+import jalview.datamodel.DBRefSource;
+
 /**
  * Canonical Uniprot fetcher instance specifically retrieving UP_NAME
  * references.
@@ -27,8 +29,7 @@ package jalview.ws.dbsources;
  * @author JimP
  * 
  */
-public class UnprotName extends Uniprot implements
-        jalview.ws.seqfetcher.DbSourceProxy
+public class UniprotName extends Uniprot
 {
 
   /*
@@ -36,9 +37,10 @@ public class UnprotName extends Uniprot implements
    * 
    * @see jalview.ws.dbsources.Uniprot#getDbSource()
    */
+  @Override
   public String getDbSource()
   {
-    return jalview.datamodel.DBRefSource.UP_NAME;
+    return DBRefSource.UP_NAME;
   }
 
 }
index c392ce6..26c9997 100644 (file)
@@ -22,6 +22,7 @@ package jalview.ws.dbsources;
 
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
+import jalview.io.FormatAdapter;
 import jalview.ws.seqfetcher.DbSourceProxyImpl;
 
 /**
@@ -40,10 +41,12 @@ public abstract class Xfam extends DbSourceProxyImpl
 
   protected abstract String getXFAMURL();
 
+  @Override
   public abstract String getDbVersion();
 
   abstract String getXfamSource();
 
+  @Override
   public AlignmentI getSequenceRecords(String queries) throws Exception
   {
     // TODO: this is not a perfect implementation. We need to be able to add
@@ -51,9 +54,8 @@ public abstract class Xfam extends DbSourceProxyImpl
     // retrieved.
     startQuery();
     // TODO: trap HTTP 404 exceptions and return null
-    AlignmentI rcds = new jalview.io.FormatAdapter().readFile(getXFAMURL()
-            + queries.trim().toUpperCase(), jalview.io.FormatAdapter.URL,
-            "STH");
+    AlignmentI rcds = new FormatAdapter().readFile(getXFAMURL()
+            + queries.trim().toUpperCase(), FormatAdapter.URL, "STH");
     for (int s = 0, sNum = rcds.getHeight(); s < sNum; s++)
     {
       rcds.getSequenceAt(s).addDBRef(new DBRefEntry(getXfamSource(),
@@ -70,4 +72,13 @@ public abstract class Xfam extends DbSourceProxyImpl
     return rcds;
   }
 
+  /**
+   * Pfam and Rfam provide alignments
+   */
+  @Override
+  public boolean isAlignmentSource()
+  {
+    return true;
+  }
+
 }
index 8c7768d..8299e3c 100644 (file)
@@ -123,7 +123,7 @@ class JPredThread extends JWS1Thread implements WSClientI
         jalview.bin.Cache.log.debug("Getting associated alignment.");
         // we ignore the returned alignment if we only predicted on a single
         // sequence
-        String format = new jalview.io.IdentifyFile().Identify(
+        String format = new jalview.io.IdentifyFile().identify(
                 result.getAligfile(), "Paste");
 
         if (jalview.io.FormatAdapter.isValidFormat(format))
index a28494c..b2e9b35 100644 (file)
@@ -152,6 +152,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
      * 
      * @return true if getAlignment will return a valid alignment result.
      */
+    @Override
     public boolean hasResults()
     {
       if (subjobComplete
@@ -170,7 +171,8 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
      * 
      * @return null or { Alignment(+features and annotation), NewickFile)}
      */
-    public Object[] getAlignment(Alignment dataset, Map featureColours)
+    public Object[] getAlignment(Alignment dataset,
+            Map<String, Object> featureColours)
     {
 
       if (result != null && result.isFinished())
@@ -287,6 +289,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
      * 
      * @return boolean true if job can be submitted.
      */
+    @Override
     public boolean hasValidInput()
     {
       if (seqs.getSeqs() != null)
@@ -373,11 +376,13 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
     }
   }
 
+  @Override
   public boolean isCancellable()
   {
     return true;
   }
 
+  @Override
   public void cancelJob()
   {
     if (!jobComplete && jobs != null)
@@ -444,12 +449,14 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
     }
   }
 
+  @Override
   public void pollJob(AWsJob job) throws Exception
   {
     ((SeqSearchWSJob) job).result = server.getResult(((SeqSearchWSJob) job)
             .getJobId());
   }
 
+  @Override
   public void StartJob(AWsJob job)
   {
     if (!(job instanceof SeqSearchWSJob))
@@ -536,6 +543,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
     return msa;
   }
 
+  @Override
   public void parseResult()
   {
     int results = 0; // number of result sets received
@@ -579,6 +587,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
       wsInfo.showResultsNewFrame
               .addActionListener(new java.awt.event.ActionListener()
               {
+                @Override
                 public void actionPerformed(java.awt.event.ActionEvent evt)
                 {
                   displayResults(true);
@@ -587,6 +596,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
       wsInfo.mergeResults
               .addActionListener(new java.awt.event.ActionListener()
               {
+                @Override
                 public void actionPerformed(java.awt.event.ActionEvent evt)
                 {
                   displayResults(false);
@@ -612,7 +622,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
     // NewickFile nf[] = new NewickFile[jobs.length];
     for (int j = 0; j < jobs.length; j++)
     {
-      Map featureColours = new HashMap();
+      Map<String, Object> featureColours = new HashMap<String, Object>();
       Alignment al = null;
       NewickFile nf = null;
       if (jobs[j].hasResults())
@@ -664,6 +674,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI
     }
   }
 
+  @Override
   public boolean canMergeResults()
   {
     return false;
index 0fe7275..7d88414 100644 (file)
@@ -20,6 +20,7 @@
  */
 package jalview.ws.seqfetcher;
 
+import jalview.bin.Cache;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.SequenceI;
@@ -27,10 +28,11 @@ import jalview.util.DBRefUtils;
 import jalview.util.MessageManager;
 
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Hashtable;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Stack;
@@ -39,60 +41,93 @@ import java.util.Vector;
 public class ASequenceFetcher
 {
 
-  /**
+  /*
    * set of databases we can retrieve entries from
    */
-  protected Hashtable<String, Map<String, DbSourceProxy>> FETCHABLEDBS;
+  protected Hashtable<String, Map<String, DbSourceProxy>> fetchableDbs;
+
+  /*
+   * comparator to sort by tier (0/1/2) and name
+   */
+  private Comparator<DbSourceProxy> proxyComparator;
 
+  /**
+   * Constructor
+   */
   public ASequenceFetcher()
   {
     super();
+
+    /*
+     * comparator to sort proxies by tier and name
+     */
+    proxyComparator = new Comparator<DbSourceProxy>()
+    {
+      @Override
+      public int compare(DbSourceProxy o1, DbSourceProxy o2)
+      {
+        /*
+         * Tier 0 precedes 1 precedes 2
+         */
+        int compared = Integer.compare(o1.getTier(), o2.getTier());
+        if (compared == 0)
+        {
+          // defend against NullPointer - should never happen
+          String o1Name = o1.getDbName();
+          String o2Name = o2.getDbName();
+          if (o1Name != null && o2Name != null)
+          {
+            compared = o1Name.compareToIgnoreCase(o2Name);
+          }
+        }
+        return compared;
+      }
+    };
   }
 
   /**
-   * get list of supported Databases
+   * get array of supported Databases
    * 
    * @return database source string for each database - only the latest version
    *         of a source db is bound to each source.
    */
   public String[] getSupportedDb()
   {
-    if (FETCHABLEDBS == null)
+    if (fetchableDbs == null)
     {
       return null;
     }
-    String[] sf = new String[FETCHABLEDBS.size()];
-    Enumeration e = FETCHABLEDBS.keys();
-    int i = 0;
-    while (e.hasMoreElements())
-    {
-      sf[i++] = (String) e.nextElement();
-    }
-    ;
+    String[] sf = fetchableDbs.keySet().toArray(
+            new String[fetchableDbs.size()]);
     return sf;
   }
 
   public boolean isFetchable(String source)
   {
-    Enumeration e = FETCHABLEDBS.keys();
-    while (e.hasMoreElements())
+    for (String db : fetchableDbs.keySet())
     {
-      String db = (String) e.nextElement();
-      if (source.compareToIgnoreCase(db) == 0)
+      if (source.equalsIgnoreCase(db))
       {
         return true;
       }
     }
-    jalview.bin.Cache.log.warn("isFetchable doesn't know about '" + source
+    Cache.log.warn("isFetchable doesn't know about '" + source
             + "'");
     return false;
   }
 
-  public SequenceI[] getSequences(jalview.datamodel.DBRefEntry[] refs)
+  /**
+   * Fetch sequences for the given cross-references
+   * 
+   * @param refs
+   * @param dna
+   *          if true, only fetch from nucleotide data sources, else peptide
+   * @return
+   */
+  public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna)
   {
-    SequenceI[] ret = null;
-    Vector<SequenceI> rseqs = new Vector();
-    Hashtable<String, List<String>> queries = new Hashtable();
+    Vector<SequenceI> rseqs = new Vector<SequenceI>();
+    Hashtable<String, List<String>> queries = new Hashtable<String, List<String>>();
     for (int r = 0; r < refs.length; r++)
     {
       if (!queries.containsKey(refs[r].getSource()))
@@ -118,22 +153,22 @@ public class ASequenceFetcher
                 "Don't know how to fetch from this database :" + db));
         continue;
       }
-      Iterator<DbSourceProxy> fetchers = getSourceProxy(db).iterator();
+
       Stack<String> queriesLeft = new Stack<String>();
-      // List<String> queriesFailed = new ArrayList<String>();
       queriesLeft.addAll(query);
-      while (fetchers.hasNext())
+
+      for (DbSourceProxy fetcher : getSourceProxy(db))
       {
         List<String> queriesMade = new ArrayList<String>();
-        HashSet queriesFound = new HashSet<String>();
+        HashSet<String> queriesFound = new HashSet<String>();
         try
         {
-          DbSourceProxy fetcher = fetchers.next();
-          boolean doMultiple = fetcher.getAccessionSeparator() != null; // No
-          // separator
-          // - no
-          // Multiple
-          // Queries
+          if (fetcher.isDnaCoding() != dna)
+          {
+            continue; // wrong sort of data
+          }
+          boolean doMultiple = fetcher.getAccessionSeparator() != null;
+          // No separator - no Multiple Queries
           while (!queriesLeft.isEmpty())
           {
             StringBuffer qsb = new StringBuffer();
@@ -225,19 +260,19 @@ public class ASequenceFetcher
         }
       }
     }
+
+    SequenceI[] result = null;
     if (rseqs.size() > 0)
     {
-      ret = new SequenceI[rseqs.size()];
-      Enumeration sqs = rseqs.elements();
+      result = new SequenceI[rseqs.size()];
       int si = 0;
-      while (sqs.hasMoreElements())
+      for (SequenceI s : rseqs)
       {
-        SequenceI s = (SequenceI) sqs.nextElement();
-        ret[si++] = s;
+        result[si++] = s;
         s.updatePDBIds();
       }
     }
-    return ret;
+    return result;
   }
 
   public void reportStdError(String db, List<String> queriesMade,
@@ -261,50 +296,32 @@ public class ASequenceFetcher
   }
 
   /**
-   * Retrieve an instance of the proxy for the given source
+   * Returns a list of proxies for the given source
    * 
    * @param db
    *          database source string TODO: add version string/wildcard for
    *          retrieval of specific DB source/version combinations.
-   * @return an instance of DbSourceProxy for that db.
+   * @return a list of DbSourceProxy for the db
    */
   public List<DbSourceProxy> getSourceProxy(String db)
   {
-    List<DbSourceProxy> dbs;
-    Map<String, DbSourceProxy> dblist = FETCHABLEDBS.get(db);
+    db = DBRefUtils.getCanonicalName(db);
+    Map<String, DbSourceProxy> dblist = fetchableDbs.get(db);
     if (dblist == null)
     {
       return new ArrayList<DbSourceProxy>();
     }
-    ;
-    if (dblist.size() > 1)
-    {
-      DbSourceProxy[] l = dblist.values().toArray(new DbSourceProxy[0]);
-      int i = 0;
-      String[] nm = new String[l.length];
-      // make sure standard dbs appear first, followed by reference das sources,
-      // followed by anything else.
-      for (DbSourceProxy s : l)
-      {
-        nm[i++] = "" + s.getTier() + s.getDbName().toLowerCase();
-      }
-      jalview.util.QuickSort.sort(nm, l);
-      dbs = new ArrayList<DbSourceProxy>();
-      for (i = l.length - 1; i >= 0; i--)
-      {
-        dbs.add(l[i]);
-      }
-    }
-    else
-    {
-      dbs = new ArrayList<DbSourceProxy>(dblist.values());
-    }
+
+    /*
+     * sort so that primary sources precede secondary
+     */
+    List<DbSourceProxy> dbs = new ArrayList<DbSourceProxy>(dblist.values());
+    Collections.sort(dbs, proxyComparator);
     return dbs;
   }
 
   /**
-   * constructs and instance of the proxy and registers it as a valid
-   * dbrefsource
+   * constructs an instance of the proxy and registers it as a valid dbrefsource
    * 
    * @param dbSourceProxy
    *          reference for class implementing
@@ -312,7 +329,7 @@ public class ASequenceFetcher
    */
   protected void addDBRefSourceImpl(
           Class<? extends DbSourceProxy> dbSourceProxy)
-          throws java.lang.IllegalArgumentException
+          throws IllegalArgumentException
   {
     DbSourceProxy proxy = null;
     try
@@ -343,15 +360,15 @@ public class ASequenceFetcher
   {
     if (proxy != null)
     {
-      if (FETCHABLEDBS == null)
+      if (fetchableDbs == null)
       {
-        FETCHABLEDBS = new Hashtable<String, Map<String, DbSourceProxy>>();
+        fetchableDbs = new Hashtable<String, Map<String, DbSourceProxy>>();
       }
-      Map<String, DbSourceProxy> slist = FETCHABLEDBS.get(proxy
+      Map<String, DbSourceProxy> slist = fetchableDbs.get(proxy
               .getDbSource());
       if (slist == null)
       {
-        FETCHABLEDBS.put(proxy.getDbSource(),
+        fetchableDbs.put(proxy.getDbSource(),
                 slist = new Hashtable<String, DbSourceProxy>());
       }
       slist.put(proxy.getDbName(), proxy);
@@ -359,34 +376,6 @@ public class ASequenceFetcher
   }
 
   /**
-   * test if the database handler for dbName contains the given dbProperty when
-   * a dbName resolves to a set of proxies - this method will return the result
-   * of the test for the first instance. TODO implement additional method to
-   * query all sources for a db to find one with a particular property
-   * 
-   * @param dbName
-   * @param dbProperty
-   * @return true if proxy has the given property
-   */
-  public boolean hasDbSourceProperty(String dbName, String dbProperty)
-  {
-    // TODO: decide if invalidDbName exception is thrown here.
-
-    List<DbSourceProxy> proxies = getSourceProxy(dbName);
-    if (proxies != null)
-    {
-      for (DbSourceProxy proxy : proxies)
-      {
-        if (proxy.getDbSourceProperties() != null)
-        {
-          return proxy.getDbSourceProperties().containsKey(dbProperty);
-        }
-      }
-    }
-    return false;
-  }
-
-  /**
    * select sources which are implemented by instances of the given class
    * 
    * @param class that implements DbSourceProxy
@@ -394,7 +383,7 @@ public class ASequenceFetcher
    */
   public String[] getDbInstances(Class class1)
   {
-    if (!jalview.ws.seqfetcher.DbSourceProxy.class.isAssignableFrom(class1))
+    if (!DbSourceProxy.class.isAssignableFrom(class1))
     {
       throw new Error(
               MessageManager
@@ -402,17 +391,17 @@ public class ASequenceFetcher
                               "error.implementation_error_dbinstance_must_implement_interface",
                               new String[] { class1.toString() }));
     }
-    if (FETCHABLEDBS == null)
+    if (fetchableDbs == null)
     {
       return null;
     }
     String[] sources = null;
-    Vector src = new Vector();
-    Enumeration dbs = FETCHABLEDBS.keys();
+    Vector<String> src = new Vector<String>();
+    Enumeration<String> dbs = fetchableDbs.keys();
     while (dbs.hasMoreElements())
     {
-      String dbn = (String) dbs.nextElement();
-      for (DbSourceProxy dbp : FETCHABLEDBS.get(dbn).values())
+      String dbn = dbs.nextElement();
+      for (DbSourceProxy dbp : fetchableDbs.get(dbn).values())
       {
         if (class1.isAssignableFrom(dbp.getClass()))
         {
@@ -429,7 +418,7 @@ public class ASequenceFetcher
 
   public DbSourceProxy[] getDbSourceProxyInstances(Class class1)
   {
-    ArrayList<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
+    List<DbSourceProxy> prlist = new ArrayList<DbSourceProxy>();
     for (String fetchable : getSupportedDb())
     {
       for (DbSourceProxy pr : getSourceProxy(fetchable))
index 556df1f..fba9e83 100644 (file)
@@ -22,8 +22,6 @@ package jalview.ws.seqfetcher;
 
 import jalview.datamodel.AlignmentI;
 
-import java.util.Hashtable;
-
 import com.stevesoft.pat.Regex;
 
 /**
@@ -78,15 +76,6 @@ public interface DbSourceProxy
   public Regex getAccessionValidator();
 
   /**
-   * DbSource properties hash - define the capabilities of this source Property
-   * hash methods defined in DbSourceProxyImpl. See constants in
-   * jalview.datamodel.DBRefSource for definition of properties.
-   * 
-   * @return
-   */
-  public Hashtable getDbSourceProperties();
-
-  /**
    * 
    * @return a test/example query that can be used to validate retrieval and
    *         parsing mechanisms
@@ -94,7 +83,9 @@ public interface DbSourceProxy
   public String getTestQuery();
 
   /**
-   * optionally implemented
+   * Required for sources supporting multiple query retrieval for use with the
+   * DBRefFetcher, which attempts to limit its queries with putative accession
+   * strings for a source to only those that are likely to be valid.
    * 
    * @param accession
    * @return
@@ -107,7 +98,9 @@ public interface DbSourceProxy
    * 
    * @param queries
    *          - one or more queries for database in expected form
-   * @return null if queries were successful but result was not parsable
+   * @return null if queries were successful but result was not parsable.
+   *         Otherwise, an AlignmentI object containing properly annotated data
+   *         (e.g. sequences with accessions for this datasource)
    * @throws Exception
    *           - propagated from underlying transport to database (note -
    *           exceptions are not raised if query not found in database)
@@ -129,19 +122,51 @@ public interface DbSourceProxy
   public StringBuffer getRawRecords();
 
   /**
-   * Find out more info about the source.
+   * Tier for this data source
    * 
-   * @param dbsourceproperty
-   *          - one of the database reference source properties in
-   *          jalview.datamodel.DBRefSource
-   * @return true if the source has this property
+   * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
    */
-  public boolean isA(Object dbsourceproperty);
+  public int getTier();
 
   /**
-   * Tier for this data source
+   * Extracts valid accession strings from a query string. If there is an
+   * accession id validator, returns the the matched region or the first
+   * subgroup match from the matched region; else just returns the whole query.
    * 
-   * @return 0 - primary datasource, 1 - das primary source, 2 - secondary
+   * @param query
+   * @return
    */
-  public int getTier();
-}
+  String getAccessionIdFromQuery(String query);
+
+  /**
+   * Returns the maximum number of accession ids that can be queried in one
+   * request.
+   * 
+   * @return
+   */
+  public int getMaximumQueryCount();
+
+  /**
+   * Returns true if the source may provide coding DNA i.e. sequences with
+   * implicit peptide products
+   * 
+   * @return
+   */
+  public boolean isDnaCoding();
+
+  /**
+   * Answers true if the database is a source of alignments (for example, domain
+   * families)
+   * 
+   * @return
+   */
+  public boolean isAlignmentSource();
+
+  /**
+   * Returns an (optional) description of the source, suitable for display as a
+   * tooltip, or null
+   * 
+   * @return
+   */
+  public String getDescription();
+}
\ No newline at end of file
index 0785dfa..0a4d9a8 100644 (file)
@@ -24,7 +24,7 @@ import jalview.datamodel.AlignmentI;
 import jalview.io.FormatAdapter;
 import jalview.io.IdentifyFile;
 
-import java.util.Hashtable;
+import com.stevesoft.pat.Regex;
 
 /**
  * common methods for implementations of the DbSourceProxy interface.
@@ -34,50 +34,21 @@ import java.util.Hashtable;
  */
 public abstract class DbSourceProxyImpl implements DbSourceProxy
 {
-  public DbSourceProxyImpl()
-  {
-    // default constructor - do nothing probably.
-  }
-
-  private Hashtable props = null;
 
-  /*
-   * (non-Javadoc)
-   * 
-   * @see jalview.ws.DbSourceProxy#getDbSourceProperties()
-   */
-  public Hashtable getDbSourceProperties()
-  {
-    if (props == null)
-    {
-      props = new Hashtable();
-    }
-    return props;
-  }
+  boolean queryInProgress = false;
 
-  protected void addDbSourceProperty(Object propname)
-  {
-    addDbSourceProperty(propname, propname);
-  }
+  protected StringBuffer results = null;
 
-  protected void addDbSourceProperty(Object propname, Object propvalue)
+  public DbSourceProxyImpl()
   {
-    if (props == null)
-    {
-      props = new Hashtable();
-    }
-    props.put(propname, propvalue);
   }
 
-  boolean queryInProgress = false;
-
-  protected StringBuffer results = null;
-
   /*
    * (non-Javadoc)
    * 
    * @see jalview.ws.DbSourceProxy#getRawRecords()
    */
+  @Override
   public StringBuffer getRawRecords()
   {
     return results;
@@ -88,6 +59,7 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
    * 
    * @see jalview.ws.DbSourceProxy#queryInProgress()
    */
+  @Override
   public boolean queryInProgress()
   {
     return queryInProgress;
@@ -121,7 +93,7 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
   protected AlignmentI parseResult(String result) throws Exception
   {
     AlignmentI sequences = null;
-    String format = new IdentifyFile().Identify(result, "Paste");
+    String format = new IdentifyFile().identify(result, "Paste");
     if (FormatAdapter.isValidFormat(format))
     {
       sequences = new FormatAdapter().readFile(result.toString(), "Paste",
@@ -131,10 +103,54 @@ public abstract class DbSourceProxyImpl implements DbSourceProxy
   }
 
   @Override
-  public boolean isA(Object dbsourceproperty)
+  public String getAccessionIdFromQuery(String query)
+  {
+    Regex vgr = getAccessionValidator();
+    if (vgr == null)
+    {
+      return query;
+    }
+    vgr.search(query);
+    if (vgr.numSubs() > 0)
+    {
+      return (vgr.stringMatched(1));
+    }
+    else
+    {
+      return (vgr.stringMatched());
+    }
+  }
+
+  /**
+   * Default is only one accession id per query - override if more are allowed.
+   */
+  @Override
+  public int getMaximumQueryCount()
+  {
+    return 1;
+  }
+
+  /**
+   * Returns false - override to return true for DNA coding data sources
+   */
+  @Override
+  public boolean isDnaCoding()
   {
-    assert (dbsourceproperty != null);
-    return (props == null) ? false : props.containsKey(dbsourceproperty);
+    return false;
   }
 
+  /**
+   * Answers false - override as required in subclasses
+   */
+  @Override
+  public boolean isAlignmentSource()
+  {
+    return false;
+  }
+
+  @Override
+  public String getDescription()
+  {
+    return null;
+  }
 }
index 74e4940..09bd64e 100644 (file)
@@ -36,6 +36,7 @@ import jalview.datamodel.Mapping;
 import jalview.datamodel.SearchResults;
 import jalview.datamodel.SearchResults.Match;
 import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.FormatAdapter;
@@ -45,10 +46,8 @@ import jalview.util.MappingUtils;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -372,8 +371,8 @@ public class AlignmentUtilsTests
      * region). The leading gap, and the gaps between codons, are subsumed by
      * the protein alignment gap.
      */
-    checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", true, true, map,
-            "---G-GG---AA-A-");
+    checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
+            "---G-GG---AA-A---");
 
     /*
      * Include only unmapped gaps in dna when realigning (outside the exon
@@ -381,7 +380,7 @@ public class AlignmentUtilsTests
      * the protein alignment gap.
      */
     checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
-            "---GGG---AAA-");
+            "---GGG---AAA---");
   }
 
   /**
@@ -439,7 +438,6 @@ public class AlignmentUtilsTests
   @Test(groups = { "Functional" })
   public void testAlignSequenceAs_withMapping_withUnmappedProtein()
   {
-
     /*
      * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
      */
@@ -447,38 +445,39 @@ public class AlignmentUtilsTests
         1, 1, 3, 3 }, 3, 1);
 
     /*
-     * Expect alignment does nothing (aborts realignment). Change this test
-     * first if different behaviour wanted.
+     * -L- 'aligns' ccc------
      */
-    checkAlignSequenceAs("GGGAAACCCTTTGGG", "-A-L-P-", false, false, map,
-            "GGGAAACCCTTTGGG");
+    checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
+            "gggAAAccc------TTTggg");
   }
 
   /**
    * Helper method that performs and verifies the method under test.
    * 
-   * @param dnaSeq
-   * @param proteinSeq
+   * @param alignee
+   *          the sequence to be realigned
+   * @param alignModel
+   *          the sequence whose alignment is to be copied
    * @param preserveMappedGaps
    * @param preserveUnmappedGaps
    * @param map
    * @param expected
    */
-  protected void checkAlignSequenceAs(final String dnaSeq,
-          final String proteinSeq, final boolean preserveMappedGaps,
+  protected void checkAlignSequenceAs(final String alignee,
+          final String alignModel, final boolean preserveMappedGaps,
           final boolean preserveUnmappedGaps, MapList map,
           final String expected)
   {
-    SequenceI dna = new Sequence("Seq1", dnaSeq);
-    dna.createDatasetSequence();
-    SequenceI protein = new Sequence("Seq1", proteinSeq);
-    protein.createDatasetSequence();
+    SequenceI alignMe = new Sequence("Seq1", alignee);
+    alignMe.createDatasetSequence();
+    SequenceI alignFrom = new Sequence("Seq2", alignModel);
+    alignFrom.createDatasetSequence();
     AlignedCodonFrame acf = new AlignedCodonFrame();
-    acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
+    acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
 
-    AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-',
+    AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
             preserveMappedGaps, preserveUnmappedGaps);
-    assertEquals(expected, dna.getSequenceAsString());
+    assertEquals(expected, alignMe.getSequenceAsString());
   }
 
   /**
@@ -550,7 +549,9 @@ public class AlignmentUtilsTests
     acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
     acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
     acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
-    protein.setCodonFrames(Collections.singleton(acf));
+    ArrayList<AlignedCodonFrame> acfs = new ArrayList<AlignedCodonFrame>();
+    acfs.add(acf);
+    protein.setCodonFrames(acfs);
 
     /*
      * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
@@ -1000,10 +1001,10 @@ public class AlignmentUtilsTests
   }
 
   /**
-   * Test the method that extracts the exon-only part of a dna alignment.
+   * Test the method that extracts the cds-only part of a dna alignment.
    */
   @Test(groups = { "Functional" })
-  public void testMakeExonAlignment()
+  public void testMakeCdsAlignment()
   {
     SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
     SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
@@ -1014,7 +1015,7 @@ public class AlignmentUtilsTests
     pep1.createDatasetSequence();
     pep2.createDatasetSequence();
 
-    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     MapList map = new MapList(new int[] { 4, 6, 10, 12 },
             new int[] { 1, 2 }, 3, 1);
     AlignedCodonFrame acf = new AlignedCodonFrame();
@@ -1026,11 +1027,11 @@ public class AlignmentUtilsTests
     acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
     mappings.add(acf);
 
-    AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[] {
+    AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
         dna1, dna2 }, mappings);
-    assertEquals(2, exons.getSequences().size());
-    assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString());
-    assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString());
+    assertEquals(2, cds.getSequences().size());
+    assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+    assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
 
     /*
      * Verify updated mappings
@@ -1047,14 +1048,14 @@ public class AlignmentUtilsTests
     SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
     assertEquals(1, sr.getResults().size());
     Match m = sr.getResults().get(0);
-    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+    assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
             m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
     // map F to TTT
     sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
     m = sr.getResults().get(0);
-    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+    assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
             m.getSequence());
     assertEquals(4, m.getStart());
     assertEquals(6, m.getEnd());
@@ -1069,34 +1070,34 @@ public class AlignmentUtilsTests
     sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
     assertEquals(1, sr.getResults().size());
     m = sr.getResults().get(0);
-    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
     // map F to TTT
     sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
     m = sr.getResults().get(0);
-    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(4, m.getStart());
     assertEquals(6, m.getEnd());
     // map P to CCC
     sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
     m = sr.getResults().get(0);
-    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+    assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
             m.getSequence());
     assertEquals(7, m.getStart());
     assertEquals(9, m.getEnd());
   }
 
   /**
-   * Test the method that makes an exon-only sequence from a DNA sequence and
-   * its product mapping. Test includes the expected case that the DNA sequence
+   * Test the method that makes a cds-only sequence from a DNA sequence and its
+   * product mapping. Test includes the expected case that the DNA sequence
    * already has a protein product (Uniprot translation) which in turn has an
    * x-ref to the EMBLCDS record.
    */
   @Test(groups = { "Functional" })
-  public void testMakeExonSequences()
+  public void testMakeCdsSequences()
   {
     SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
     SequenceI pep1 = new Sequence("pep1", "GF");
@@ -1117,27 +1118,27 @@ public class AlignmentUtilsTests
     mappings.add(acf);
 
     AlignedCodonFrame newMapping = new AlignedCodonFrame();
-    List<SequenceI> exons = AlignmentUtils.makeExonSequences(dna1, acf,
+    List<SequenceI> cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf,
             newMapping);
-    assertEquals(1, exons.size());
-    SequenceI exon = exons.get(0);
+    assertEquals(1, cdsSeqs.size());
+    SequenceI cdsSeq = cdsSeqs.get(0);
 
-    assertEquals("GGGTTT", exon.getSequenceAsString());
-    assertEquals("dna1|A12345", exon.getName());
-    assertEquals(1, exon.getDBRefs().length);
-    DBRefEntry cdsRef = exon.getDBRefs()[0];
+    assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+    assertEquals("dna1|A12345", cdsSeq.getName());
+    assertEquals(1, cdsSeq.getDBRefs().length);
+    DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
     assertEquals("EMBLCDS", cdsRef.getSource());
     assertEquals("2", cdsRef.getVersion());
     assertEquals("A12345", cdsRef.getAccessionId());
   }
 
   /**
-   * Test the method that makes an exon-only alignment from a DNA sequence and
-   * its product mappings, for the case where there are multiple exon mappings
-   * to different protein products.
+   * Test the method that makes a cds-only alignment from a DNA sequence and its
+   * product mappings, for the case where there are multiple exon mappings to
+   * different protein products.
    */
   @Test(groups = { "Functional" })
-  public void testMakeExonAlignment_multipleProteins()
+  public void testMakeCdsAlignment_multipleProteins()
   {
     SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
     SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
@@ -1159,7 +1160,7 @@ public class AlignmentUtilsTests
      * convenience so results are in the input order. There is no assertion that
      * the generated exon sequences are in any particular order.
      */
-    Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     // map ...GGG...TTT to GF
     MapList map = new MapList(new int[] { 4, 6, 10, 12 },
             new int[] { 1, 2 }, 3, 1);
@@ -1183,82 +1184,82 @@ public class AlignmentUtilsTests
      * Create the Exon alignment; also replaces the dna-to-protein mappings with
      * exon-to-protein and exon-to-dna mappings
      */
-    AlignmentI exal = AlignmentUtils.makeExonAlignment(
+    AlignmentI exal = AlignmentUtils.makeCdsAlignment(
             new SequenceI[] { dna1 }, mappings);
 
     /*
-     * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively
+     * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
      */
-    List<SequenceI> exons = exal.getSequences();
-    assertEquals(3, exons.size());
-
-    SequenceI exon = exons.get(0);
-    assertEquals("GGGTTT", exon.getSequenceAsString());
-    assertEquals("dna1|A12345", exon.getName());
-    assertEquals(1, exon.getDBRefs().length);
-    DBRefEntry cdsRef = exon.getDBRefs()[0];
+    List<SequenceI> cds = exal.getSequences();
+    assertEquals(3, cds.size());
+
+    SequenceI cdsSeq = cds.get(0);
+    assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
+    assertEquals("dna1|A12345", cdsSeq.getName());
+    assertEquals(1, cdsSeq.getDBRefs().length);
+    DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
     assertEquals("EMBLCDS", cdsRef.getSource());
     assertEquals("2", cdsRef.getVersion());
     assertEquals("A12345", cdsRef.getAccessionId());
 
-    exon = exons.get(1);
-    assertEquals("aaaccc", exon.getSequenceAsString());
-    assertEquals("dna1|A12346", exon.getName());
-    assertEquals(1, exon.getDBRefs().length);
-    cdsRef = exon.getDBRefs()[0];
+    cdsSeq = cds.get(1);
+    assertEquals("aaaccc", cdsSeq.getSequenceAsString());
+    assertEquals("dna1|A12346", cdsSeq.getName());
+    assertEquals(1, cdsSeq.getDBRefs().length);
+    cdsRef = cdsSeq.getDBRefs()[0];
     assertEquals("EMBLCDS", cdsRef.getSource());
     assertEquals("3", cdsRef.getVersion());
     assertEquals("A12346", cdsRef.getAccessionId());
 
-    exon = exons.get(2);
-    assertEquals("aaaTTT", exon.getSequenceAsString());
-    assertEquals("dna1|A12347", exon.getName());
-    assertEquals(1, exon.getDBRefs().length);
-    cdsRef = exon.getDBRefs()[0];
+    cdsSeq = cds.get(2);
+    assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
+    assertEquals("dna1|A12347", cdsSeq.getName());
+    assertEquals(1, cdsSeq.getDBRefs().length);
+    cdsRef = cdsSeq.getDBRefs()[0];
     assertEquals("EMBLCDS", cdsRef.getSource());
     assertEquals("4", cdsRef.getVersion());
     assertEquals("A12347", cdsRef.getAccessionId());
 
     /*
-     * Verify there are mappings from each exon sequence to its protein product
+     * Verify there are mappings from each cds sequence to its protein product
      * and also to its dna source
      */
     Iterator<AlignedCodonFrame> newMappingsIterator = mappings.iterator();
 
     // mappings for dna1 - exon1 - pep1
-    AlignedCodonFrame exonMapping = newMappingsIterator.next();
-    List<Mapping> dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    AlignedCodonFrame cdsMapping = newMappingsIterator.next();
+    List<Mapping> dnaMappings = cdsMapping.getMappingsForSequence(dna1);
     assertEquals(1, dnaMappings.size());
-    assertSame(exons.get(0).getDatasetSequence(), dnaMappings.get(0)
+    assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
             .getTo());
     assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
             .get(0).getMap().getToPosition(1));
-    List<Mapping> peptideMappings = exonMapping
+    List<Mapping> peptideMappings = cdsMapping
             .getMappingsForSequence(pep1);
     assertEquals(1, peptideMappings.size());
     assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
 
-    // mappings for dna1 - exon2 - pep2
-    exonMapping = newMappingsIterator.next();
-    dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    // mappings for dna1 - cds2 - pep2
+    cdsMapping = newMappingsIterator.next();
+    dnaMappings = cdsMapping.getMappingsForSequence(dna1);
     assertEquals(1, dnaMappings.size());
-    assertSame(exons.get(1).getDatasetSequence(), dnaMappings.get(0)
+    assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0)
             .getTo());
     assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
             .get(0).getMap().getToPosition(4));
-    peptideMappings = exonMapping.getMappingsForSequence(pep2);
+    peptideMappings = cdsMapping.getMappingsForSequence(pep2);
     assertEquals(1, peptideMappings.size());
     assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
 
-    // mappings for dna1 - exon3 - pep3
-    exonMapping = newMappingsIterator.next();
-    dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    // mappings for dna1 - cds3 - pep3
+    cdsMapping = newMappingsIterator.next();
+    dnaMappings = cdsMapping.getMappingsForSequence(dna1);
     assertEquals(1, dnaMappings.size());
-    assertSame(exons.get(2).getDatasetSequence(), dnaMappings.get(0)
+    assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0)
             .getTo());
     assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
             .get(0).getMap().getToPosition(4));
-    peptideMappings = exonMapping.getMappingsForSequence(pep3);
+    peptideMappings = cdsMapping.getMappingsForSequence(pep3);
     assertEquals(1, peptideMappings.size());
     assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
   }
@@ -1303,4 +1304,110 @@ public class AlignmentUtilsTests
     assertEquals(40, map.getFromLowest());
     assertEquals(48, map.getFromHighest());
   }
+
+  /**
+   * Test for the alignSequenceAs method where we have protein mapped to protein
+   */
+  @Test(groups = { "Functional" })
+  public void testAlignSequenceAs_mappedProteinProtein()
+  {
+  
+    SequenceI alignMe = new Sequence("Match", "MGAASEV");
+    alignMe.createDatasetSequence();
+    SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
+    alignFrom.createDatasetSequence();
+
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    // this is like a domain or motif match of part of a peptide sequence
+    MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
+    acf.addMap(alignFrom.getDatasetSequence(),
+            alignMe.getDatasetSequence(), map);
+    
+    AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
+            true);
+    assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
+  }
+
+  /**
+   * Test for the alignSequenceAs method where there are trailing unmapped
+   * residues in the model sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testAlignSequenceAs_withTrailingPeptide()
+  {
+    // map first 3 codons to KPF; G is a trailing unmapped residue
+    MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
+  
+    checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
+            "AAA---CCCTTT---");
+  }
+
+  @Test(groups = { "Functional" })
+  public void testTransferFeatures()
+  {
+    SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
+    SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
+
+    // no overlap
+    dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f,
+            null));
+    // partial overlap - to [1, 1]
+    dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f,
+            null));
+    // exact overlap - to [1, 3]
+    dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f,
+            null));
+    // spanning overlap - to [2, 5]
+    dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
+            null));
+    // exactly overlaps whole mapped range [1, 6]
+    dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
+            null));
+    // no overlap (internal)
+    dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f,
+            null));
+    // no overlap (3' end)
+    dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15,
+            7f, null));
+    // overlap (3' end) - to [6, 6]
+    dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
+            8f, null));
+    // extended overlap - to [6, +]
+    dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13,
+            9f, null));
+
+    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+            new int[] { 1, 6 }, 1, 1);
+
+    /*
+     * behaviour of transferFeatures depends on MapList.locateInTo()
+     * if start and end positions are mapped, returns the mapped region
+     * if either is not mapped, does _not_ search for overlapped region 
+     */
+    AlignmentUtils.transferFeatures(dna, cds, map);
+    SequenceFeature[] sfs = cds.getSequenceFeatures();
+    assertEquals(4, sfs.length);
+
+    SequenceFeature sf = sfs[0];
+    assertEquals("type3", sf.getType());
+    assertEquals("desc3", sf.getDescription());
+    assertEquals(3f, sf.getScore());
+    assertEquals(1, sf.getBegin());
+    assertEquals(3, sf.getEnd());
+
+    sf = sfs[1];
+    assertEquals("type4", sf.getType());
+    assertEquals(2, sf.getBegin());
+    assertEquals(5, sf.getEnd());
+
+    sf = sfs[2];
+    assertEquals("type5", sf.getType());
+    assertEquals(1, sf.getBegin());
+    assertEquals(6, sf.getEnd());
+
+    sf = sfs[3];
+    assertEquals("type8", sf.getType());
+    assertEquals(6, sf.getBegin());
+    assertEquals(6, sf.getEnd());
+  }
 }
index 8f878f0..9a4c357 100644 (file)
@@ -457,4 +457,58 @@ public class DnaTest
     assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString());
     assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString());
   }
+
+  /**
+   * Test dna complementing
+   */
+  @Test(groups = "Functional")
+  public void testGetComplement()
+  {
+    assertEquals('t', Dna.getComplement('a'));
+    assertEquals('T', Dna.getComplement('A'));
+    assertEquals('a', Dna.getComplement('t'));
+    assertEquals('A', Dna.getComplement('T'));
+    assertEquals('c', Dna.getComplement('g'));
+    assertEquals('C', Dna.getComplement('G'));
+    assertEquals('g', Dna.getComplement('c'));
+    assertEquals('G', Dna.getComplement('C'));
+    // note uU --> aA but not vice versa
+    assertEquals('a', Dna.getComplement('u'));
+    assertEquals('A', Dna.getComplement('U'));
+    // ambiguity codes, see http://www.bioinformatics.org/sms/iupac.html
+    assertEquals('r', Dna.getComplement('y'));
+    assertEquals('R', Dna.getComplement('Y'));
+    assertEquals('y', Dna.getComplement('r'));
+    assertEquals('Y', Dna.getComplement('R'));
+    assertEquals('k', Dna.getComplement('m'));
+    assertEquals('K', Dna.getComplement('M'));
+    assertEquals('m', Dna.getComplement('k'));
+    assertEquals('M', Dna.getComplement('K'));
+    assertEquals('b', Dna.getComplement('v'));
+    assertEquals('B', Dna.getComplement('V'));
+    assertEquals('v', Dna.getComplement('b'));
+    assertEquals('V', Dna.getComplement('B'));
+    assertEquals('d', Dna.getComplement('h'));
+    assertEquals('D', Dna.getComplement('H'));
+    assertEquals('h', Dna.getComplement('d'));
+    assertEquals('H', Dna.getComplement('D'));
+    assertEquals('Q', Dna.getComplement('Q'));
+  }
+
+  @Test(groups = "Functional")
+  public void testReverseSequence()
+  {
+    String seq = "AcGtUrYkMbVdHNX";
+
+    // reverse:
+    SequenceI reversed = Dna.reverseSequence("Seq1", seq, false);
+    assertEquals(new StringBuilder(seq).reverse()
+            .toString(), reversed.getSequenceAsString());
+    assertEquals("Seq1|rev", reversed.getName());
+
+    // reverse complement:
+    SequenceI revcomp = Dna.reverseSequence("Seq1", seq, true);
+    assertEquals("XNDhBvKmRyAaCgT", revcomp.getSequenceAsString());
+    assertEquals("Seq1|revcomp", revcomp.getName());
+  }
 }
index a0757cc..989ed7c 100644 (file)
 package jalview.datamodel;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
 import jalview.util.MapList;
 
 import java.util.Arrays;
+import java.util.List;
 
 import org.testng.annotations.Test;
 
@@ -88,7 +93,7 @@ public class AlignedCodonFrameTest
 
     final Sequence aseq1 = new Sequence("Seq1", "-P-R");
     aseq1.createDatasetSequence();
-    final Sequence aseq2 = new Sequence("Seq2", "-LY-");
+    final Sequence aseq2 = new Sequence("Seq2", "-LY-Q");
     aseq2.createDatasetSequence();
 
     /*
@@ -100,6 +105,7 @@ public class AlignedCodonFrameTest
 
     /*
      * Set up the mappings for the exons (upper-case bases)
+     * Note residue Q is unmapped
      */
     MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
         1, 2 }, 3, 1);
@@ -108,14 +114,19 @@ public class AlignedCodonFrameTest
             3, 1);
     acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map);
 
-    assertEquals("[2, 4]",
-            Arrays.toString(acf.getMappedRegion(seq1, aseq1, 1)));
-    assertEquals("[6, 6, 8, 9]",
-            Arrays.toString(acf.getMappedRegion(seq1, aseq1, 2)));
-    assertEquals("[1, 2, 4, 4]",
-            Arrays.toString(acf.getMappedRegion(seq2, aseq2, 1)));
-    assertEquals("[5, 5, 7, 8]",
-            Arrays.toString(acf.getMappedRegion(seq2, aseq2, 2)));
+    assertArrayEquals(new int[] { 2, 4 },
+            acf.getMappedRegion(seq1, aseq1, 1));
+    assertArrayEquals(new int[] { 6, 6, 8, 9 },
+            acf.getMappedRegion(seq1, aseq1, 2));
+    assertArrayEquals(new int[] { 1, 2, 4, 4 },
+            acf.getMappedRegion(seq2, aseq2, 1));
+    assertArrayEquals(new int[] { 5, 5, 7, 8 },
+            acf.getMappedRegion(seq2, aseq2, 2));
+
+    /*
+     * No mapping from seq2 to Q
+     */
+    assertNull(acf.getMappedRegion(seq2, aseq2, 3));
 
     /*
      * No mapping from sequence 1 to sequence 2
@@ -124,11 +135,11 @@ public class AlignedCodonFrameTest
   }
 
   @Test(groups = { "Functional" })
-  public void testGetMappedCodon()
+  public void testGetMappedCodons()
   {
     final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
     seq1.createDatasetSequence();
-    final Sequence aseq1 = new Sequence("Seq1", "-P-R");
+    final Sequence aseq1 = new Sequence("Seq1", "-V-L");
     aseq1.createDatasetSequence();
 
     /*
@@ -136,7 +147,42 @@ public class AlignedCodonFrameTest
      */
     AlignedCodonFrame acf = new AlignedCodonFrame();
 
-    assertNull(acf.getMappedCodon(seq1.getDatasetSequence(), 0));
+    assertNull(acf.getMappedCodons(seq1.getDatasetSequence(), 0));
+
+    /*
+     * Set up the mappings for the exons (upper-case bases)
+     */
+    MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
+        1, 2 }, 3, 1);
+    acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+
+    assertEquals(1, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
+            .size());
+    assertEquals(
+            "[G, T, A]",
+            Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+                    1).get(0)));
+    assertEquals(
+            "[C, T, T]",
+            Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+                    2).get(0)));
+  }
+
+  /**
+   * Test for the case where there is more than one variant of the DNA mapping
+   * to a protein sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testGetMappedCodons_dnaVariants()
+  {
+    final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
+    seq1.createDatasetSequence();
+    final Sequence seq2 = new Sequence("Seq2", "c-G-TT-gT-gT-A");
+    seq2.createDatasetSequence();
+    final Sequence aseq1 = new Sequence("Seq1", "-V-L");
+    aseq1.createDatasetSequence();
+
+    AlignedCodonFrame acf = new AlignedCodonFrame();
 
     /*
      * Set up the mappings for the exons (upper-case bases)
@@ -144,23 +190,30 @@ public class AlignedCodonFrameTest
     MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
         1, 2 }, 3, 1);
     acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+    acf.addMap(seq2.getDatasetSequence(), aseq1.getDatasetSequence(), map);
 
-    assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon(
-            aseq1.getDatasetSequence(), 1)));
-    assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
-            aseq1.getDatasetSequence(), 2)));
+    assertEquals(2, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
+            .size());
+    List<char[]> codonsForV = acf.getMappedCodons(
+            aseq1.getDatasetSequence(), 1);
+    assertEquals("[G, T, A]", Arrays.toString(codonsForV.get(0)));
+    assertEquals("[G, T, T]", Arrays.toString(codonsForV.get(1)));
+    List<char[]> codonsForL = acf.getMappedCodons(
+            aseq1.getDatasetSequence(), 2);
+    assertEquals("[C, T, T]", Arrays.toString(codonsForL.get(0)));
+    assertEquals("[T, T, A]", Arrays.toString(codonsForL.get(1)));
   }
 
   /**
    * Test for the case where sequences have start > 1
    */
   @Test(groups = { "Functional" })
-  public void testGetMappedCodon_forSubSequences()
+  public void testGetMappedCodons_forSubSequences()
   {
     final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35);
     seq1.createDatasetSequence();
 
-    final Sequence aseq1 = new Sequence("Seq1", "-P-R", 12, 13);
+    final Sequence aseq1 = new Sequence("Seq1", "-V-L", 12, 13);
     aseq1.createDatasetSequence();
 
     /*
@@ -171,9 +224,228 @@ public class AlignedCodonFrameTest
             new int[] { 12, 13 }, 3, 1);
     acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
 
-    assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon(
-            aseq1.getDatasetSequence(), 12)));
-    assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
-            aseq1.getDatasetSequence(), 13)));
+    assertEquals(
+            "[G, T, A]",
+            Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+                    12).get(0)));
+    assertEquals(
+            "[C, T, T]",
+            Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+                    13).get(0)));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testCouldReplaceSequence()
+  {
+    SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt");
+    SequenceI seq1proxy = new SequenceDummy("Seq1");
+
+    // map to region within sequence is ok
+    assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
+            17));
+    // map to region overlapping sequence is ok
+    assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 5,
+            10));
+    assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 21,
+            26));
+    // map to region before sequence is not ok
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 4,
+            9));
+    // map to region after sequence is not ok
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 22,
+            27));
+
+    /*
+     * test should fail if name doesn't match
+     */
+    seq1proxy.setName("Seq1a");
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
+            17));
+    seq1proxy.setName("Seq1");
+    seq1.setName("Seq1a");
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
+            17));
+
+    /*
+     * a dummy sequence can't replace a real one
+     */
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1proxy, 12,
+            17));
+
+    /*
+     * a dummy sequence can't replace a dummy sequence
+     */
+    SequenceI seq1proxy2 = new SequenceDummy("Seq1");
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy,
+            seq1proxy2, 12, 17));
+
+    /*
+     * a real sequence can't replace a real one
+     */
+    SequenceI seq1a = new Sequence("Seq1/10-21", "aaacccgggttt");
+    assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1a, 12, 17));
+  }
+
+  /**
+   * Tests for the method that tests whether any mapping to a dummy sequence can
+   * be 'realised' to a given real sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testIsRealisableWith()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "tttaaaCCCGGGtttaaa");
+    SequenceI seq2 = new Sequence("Seq2", "PG");
+    SequenceI seq1proxy = new SequenceDummy("Seq1");
+    seq1.createDatasetSequence();
+    seq2.createDatasetSequence();
+    MapList mapList = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
+            3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(seq1proxy, seq2, mapList);
+
+    /*
+     * Seq2 is mapped to SequenceDummy seq1proxy bases 4-9
+     * This is 'realisable' from real sequence Seq1
+     */
+    assertTrue(acf.isRealisableWith(seq1));
+
+    /*
+     * test should fail if name doesn't match
+     */
+    seq1proxy.setName("Seq1a");
+    assertFalse(acf.isRealisableWith(seq1));
+    seq1proxy.setName("Seq1");
+
+    SequenceI seq1ds = seq1.getDatasetSequence();
+    seq1ds.setName("Seq1a");
+    assertFalse(acf.isRealisableWith(seq1));
+    seq1ds.setName("Seq1");
+
+    /*
+     * test should fail if no sequence overlap with mapping of bases 7-12
+     * use artificial start/end values to test this
+     */
+    seq1ds.setStart(1);
+    seq1ds.setEnd(6);
+    // seq1 precedes mapped region:
+    assertFalse(acf.isRealisableWith(seq1));
+    seq1ds.setEnd(7);
+    // seq1 includes first mapped base:
+    assertTrue(acf.isRealisableWith(seq1));
+    seq1ds.setStart(13);
+    seq1ds.setEnd(18);
+    // seq1 follows mapped region:
+    assertFalse(acf.isRealisableWith(seq1));
+    seq1ds.setStart(12);
+    // seq1 includes last mapped base:
+    assertTrue(acf.isRealisableWith(seq1));
+  }
+
+  /**
+   * Tests for the method that converts mappings to a dummy sequence to mappings
+   * to a compatible real sequence
+   */
+  @Test(groups = { "Functional" })
+  public void testRealiseWith()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "tttCAACCCGGGtttaaa");
+    SequenceI seq2 = new Sequence("Seq2", "QPG");
+    SequenceI seq2a = new Sequence("Seq2a", "QPG");
+    SequenceI seq1proxy = new SequenceDummy("Seq1");
+    seq1.createDatasetSequence();
+    seq2.createDatasetSequence();
+    seq2a.createDatasetSequence();
+
+    /*
+     * Make mappings from Seq2 and Seq2a peptides to dummy sequence Seq1
+     */
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+
+    // map PG to codons 7-12 (CCCGGG)
+    MapList mapping1 = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
+            3, 1);
+    acf.addMap(seq1proxy, seq2, mapping1);
+    acf.addMap(seq1proxy, seq2a, mapping1);
+
+    // map QP to codons 4-9 (CAACCC)
+    MapList mapping2 = new MapList(new int[] { 4, 9 }, new int[] { 1, 2 },
+            3, 1);
+    acf.addMap(seq1proxy, seq2, mapping2);
+    acf.addMap(seq1proxy, seq2a, mapping2);
+
+    /*
+     * acf now has two mappings one from Seq1 to Seq2, one from Seq1 to Seq2a
+     */
+    assertEquals(2, acf.getdnaSeqs().length);
+    assertSame(seq1proxy, acf.getdnaSeqs()[0]);
+    assertSame(seq1proxy, acf.getdnaSeqs()[1]);
+    assertEquals(2, acf.getProtMappings().length);
+
+    // 'realise' these mappings with the compatible sequence seq1
+    // two mappings should be updated:
+    assertEquals(2, acf.realiseWith(seq1));
+    assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[0]);
+    assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[1]);
+  }
+
+  /**
+   * Test the method that locates the mapped codon for a protein position.
+   */
+  @Test(groups = { "Functional" })
+  public void testGetMappedRegion_eitherWay()
+  {
+    final Sequence seq1 = new Sequence("Seq1", "AAACCCGGGTTT");
+    seq1.createDatasetSequence();
+    final Sequence seq2 = new Sequence("Seq2", "KPGF");
+    seq2.createDatasetSequence();
+    final Sequence seq3 = new Sequence("Seq3", "QYKPGFSW");
+    seq3.createDatasetSequence();
+
+    /*
+     * map Seq1 to all of Seq2 and part of Seq3
+     */
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
+    acf.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
+    map = new MapList(new int[] { 1, 12 }, new int[] { 3, 6 }, 3, 1);
+    acf.addMap(seq1.getDatasetSequence(), seq3.getDatasetSequence(), map);
+
+    /*
+     * map part of Seq3 to Seq2
+     */
+    map = new MapList(new int[] { 3, 6 }, new int[] { 1, 4 }, 1, 1);
+    acf.addMap(seq3.getDatasetSequence(), seq2.getDatasetSequence(), map);
+
+    /*
+     * original case - locate mapped codon for protein position
+     */
+    assertArrayEquals(new int[] { 4, 6 },
+            acf.getMappedRegion(seq1, seq2, 2));
+    assertArrayEquals(new int[] { 7, 9 },
+            acf.getMappedRegion(seq1, seq3, 5));
+    assertNull(acf.getMappedRegion(seq1, seq3, 1));
+
+    /*
+     * locate mapped protein for protein position
+     */
+    assertArrayEquals(new int[] { 4, 4 },
+            acf.getMappedRegion(seq3, seq2, 2));
+
+    /*
+     * reverse location protein-to-protein
+     */
+    assertArrayEquals(new int[] { 2, 2 },
+            acf.getMappedRegion(seq2, seq3, 4));
+
+    /*
+     * reverse location protein-from-nucleotide
+     * any of codon [4, 5, 6] positions map to seq2/2
+     */
+    assertArrayEquals(new int[] { 2, 2 },
+            acf.getMappedRegion(seq2, seq1, 4));
+    assertArrayEquals(new int[] { 2, 2 },
+            acf.getMappedRegion(seq2, seq1, 5));
+    assertArrayEquals(new int[] { 2, 2 },
+            acf.getMappedRegion(seq2, seq1, 6));
   }
 }
index 8abc03e..b4b0e12 100644 (file)
@@ -22,6 +22,9 @@ package jalview.datamodel;
 
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
 import jalview.io.AppletFormatAdapter;
@@ -29,7 +32,9 @@ import jalview.io.FormatAdapter;
 import jalview.util.MapList;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Iterator;
+import java.util.List;
 
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
@@ -245,7 +250,7 @@ public class AlignmentTest
     ((Alignment) al1).alignAs(al2, false, true);
     assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0)
             .getSequenceAsString());
-    assertEquals("---CGT---TAACGA---AGT", al1.getSequenceAt(1)
+    assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1)
             .getSequenceAsString());
   }
 
@@ -385,4 +390,107 @@ public class AlignmentTest
     assertEquals("c--CCGgg-TT--T------AA-A", al1.getSequenceAt(1)
             .getSequenceAsString());
   }
+
+  @Test(groups = "Functional")
+  public void testCopyConstructor() throws IOException
+  {
+    AlignmentI protein = loadAlignment(AA_SEQS_1, FormatAdapter.PASTE);
+    // create sequence and alignment datasets
+    protein.setDataset(null);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
+    protein.getDataset().setCodonFrames(acfList);
+    AlignmentI copy = new Alignment(protein);
+
+    /*
+     * copy has different aligned sequences but the same dataset sequences
+     */
+    assertFalse(copy.getSequenceAt(0) == protein.getSequenceAt(0));
+    assertFalse(copy.getSequenceAt(1) == protein.getSequenceAt(1));
+    assertSame(copy.getSequenceAt(0).getDatasetSequence(), protein
+            .getSequenceAt(0).getDatasetSequence());
+    assertSame(copy.getSequenceAt(1).getDatasetSequence(), protein
+            .getSequenceAt(1).getDatasetSequence());
+
+    // TODO should the copy constructor copy the dataset?
+    // or make a new one referring to the same dataset sequences??
+    assertNull(copy.getDataset());
+    // assertArrayEquals(copy.getDataset().getSequencesArray(), protein
+    // .getDataset().getSequencesArray());
+  }
+
+  /**
+   * Test behaviour of createDataset
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testCreateDatasetAlignment() throws IOException
+  {
+    AlignmentI protein = new FormatAdapter().readFile(AA_SEQS_1,
+            AppletFormatAdapter.PASTE, "FASTA");
+    /*
+     * create a dataset sequence on first sequence
+     * leave the second without one
+     */
+    protein.getSequenceAt(0).createDatasetSequence();
+    assertNotNull(protein.getSequenceAt(0).getDatasetSequence());
+    assertNull(protein.getSequenceAt(1).getDatasetSequence());
+
+    /*
+     * add a mapping to the alignment
+     */
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    protein.addCodonFrame(acf);
+    assertNull(protein.getDataset());
+    assertTrue(protein.getCodonFrames().contains(acf));
+
+    /*
+     * create the alignment dataset
+     * note this creates sequence datasets where missing
+     * as a side-effect (in this case, on seq2
+     */
+    // TODO promote this method to AlignmentI
+    ((Alignment) protein).createDatasetAlignment();
+
+    // TODO this method should return AlignmentI not Alignment !!
+    Alignment ds = protein.getDataset();
+
+    // side-effect: dataset created on second sequence
+    assertNotNull(protein.getSequenceAt(1).getDatasetSequence());
+    // dataset alignment has references to dataset sequences
+    assertEquals(ds.getSequenceAt(0), protein.getSequenceAt(0)
+            .getDatasetSequence());
+    assertEquals(ds.getSequenceAt(1), protein.getSequenceAt(1)
+            .getDatasetSequence());
+
+    // codon frames should have been moved to the dataset
+    // getCodonFrames() should delegate to the dataset:
+    assertTrue(protein.getCodonFrames().contains(acf));
+    // prove the codon frames are indeed on the dataset:
+    assertTrue(ds.getCodonFrames().contains(acf));
+  }
+
+  @Test(groups = "Functional")
+  public void testAddCodonFrame()
+  {
+    AlignmentI align = new Alignment(new SequenceI[] {});
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    align.addCodonFrame(acf);
+    assertEquals(1, align.getCodonFrames().size());
+    assertTrue(align.getCodonFrames().contains(acf));
+    // can't add the same object twice:
+    align.addCodonFrame(acf);
+    assertEquals(1, align.getCodonFrames().size());
+
+    // create dataset alignment - mappings move to dataset
+    ((Alignment) align).createDatasetAlignment();
+    assertSame(align.getCodonFrames(), align.getDataset().getCodonFrames());
+    assertEquals(1, align.getCodonFrames().size());
+
+    AlignedCodonFrame acf2 = new AlignedCodonFrame();
+    align.addCodonFrame(acf2);
+    assertTrue(align.getDataset().getCodonFrames().contains(acf));
+  }
 }
index b0f7fe5..cbecad5 100644 (file)
@@ -57,4 +57,22 @@ public class MappingTest
     assertEquals("[[1, 6], [11, 13], [15, 20]]", result);
   }
 
+  @Test(groups = { "Functional" })
+  public void testToString()
+  {
+    /*
+     * with no sequence
+     */
+    MapList fk = new MapList(new int[] { 1, 6, 8, 13 }, new int[] { 4, 7 },
+            3, 1);
+    Mapping m = new Mapping(fk);
+    assertEquals("[ [1, 6] [8, 13] ] To [ [4, 7] ] ", m.toString());
+
+    /*
+     * with a sequence
+     */
+    SequenceI seq = new Sequence("Seq1", "");
+    m = new Mapping(seq, fk);
+    assertEquals("[ [1, 6] [8, 13] ] To [ [4, 7] ] Seq1", m.toString());
+  }
 }
diff --git a/test/jalview/datamodel/MappingTypeTest.java b/test/jalview/datamodel/MappingTypeTest.java
new file mode 100644 (file)
index 0000000..64dc793
--- /dev/null
@@ -0,0 +1,43 @@
+package jalview.datamodel;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
+
+import jalview.datamodel.MappingType;
+
+import org.testng.annotations.Test;
+
+public class MappingTypeTest
+{
+
+  @Test(groups = "Functional")
+  public void testGetInverse()
+  {
+    assertSame(MappingType.PeptideToNucleotide,
+            MappingType.NucleotideToPeptide.getInverse());
+    assertSame(MappingType.NucleotideToPeptide,
+            MappingType.PeptideToNucleotide.getInverse());
+    assertSame(MappingType.NucleotideToNucleotide,
+            MappingType.NucleotideToNucleotide.getInverse());
+    assertSame(MappingType.PeptideToPeptide,
+            MappingType.PeptideToPeptide.getInverse());
+  }
+
+  @Test(groups = "Functional")
+  public void testGetFromRatio()
+  {
+    assertEquals(1, MappingType.NucleotideToNucleotide.getFromRatio());
+    assertEquals(1, MappingType.PeptideToNucleotide.getFromRatio());
+    assertEquals(1, MappingType.PeptideToPeptide.getFromRatio());
+    assertEquals(3, MappingType.NucleotideToPeptide.getFromRatio());
+  }
+
+  @Test(groups = "Functional")
+  public void testGetToRatio()
+  {
+    assertEquals(1, MappingType.NucleotideToNucleotide.getToRatio());
+    assertEquals(3, MappingType.PeptideToNucleotide.getToRatio());
+    assertEquals(1, MappingType.PeptideToPeptide.getToRatio());
+    assertEquals(1, MappingType.NucleotideToPeptide.getToRatio());
+  }
+}
index 3838ac8..ffcaa26 100644 (file)
@@ -22,8 +22,11 @@ package jalview.datamodel;
 
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.SearchResults.Match;
+
 import org.testng.annotations.Test;
 
 public class SearchResultsTest
@@ -166,4 +169,24 @@ public class SearchResultsTest
     sr2.addResult(seq1, 6, 8);
     assertEquals(sr1.hashCode(), sr2.hashCode());
   }
+  
+  /**
+   * Verify that SearchResults$Match constructor normalises start/end to the
+   * 'forwards' direction
+   */
+  @Test(groups = { "Functional" })
+  public void testMatchConstructor()
+  {
+    SequenceI seq1 = new Sequence("", "abcdefghijklm");
+    Match m = new SearchResults().new Match(seq1, 2, 5);
+    assertSame(seq1, m.getSequence());
+    assertEquals(2, m.getStart());
+    assertEquals(5, m.getEnd());
+
+    // now a reverse mapping:
+    m = new SearchResults().new Match(seq1, 5, 2);
+    assertSame(seq1, m.getSequence());
+    assertEquals(2, m.getStart());
+    assertEquals(5, m.getEnd());
+  }
 }
index 229d0b1..e263843 100644 (file)
@@ -25,6 +25,7 @@ import static org.testng.AssertJUnit.assertTrue;
 
 import org.testng.annotations.Test;
 
+@Test
 public class SequenceDummyTest
 {
   /**
diff --git a/test/jalview/datamodel/SequenceFeatureTest.java b/test/jalview/datamodel/SequenceFeatureTest.java
new file mode 100644 (file)
index 0000000..d488a76
--- /dev/null
@@ -0,0 +1,65 @@
+package jalview.datamodel;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+
+import org.testng.annotations.Test;
+
+public class SequenceFeatureTest
+{
+  @Test(groups = { "Functional" })
+  public void testCopyConstructor()
+  {
+    SequenceFeature sf1 = new SequenceFeature("type", "desc", 22, 33,
+            12.5f, "group");
+    sf1.setValue("STRAND", "+");
+    sf1.setValue("Note", "Testing");
+    Integer count = new Integer(7);
+    sf1.setValue("Count", count);
+
+    SequenceFeature sf2 = new SequenceFeature(sf1);
+    assertEquals("type", sf2.getType());
+    assertEquals("desc", sf2.getDescription());
+    assertEquals(22, sf2.getBegin());
+    assertEquals(33, sf2.getEnd());
+    assertEquals("+", sf2.getValue("STRAND"));
+    assertEquals("Testing", sf2.getValue("Note"));
+    // shallow clone of otherDetails map - contains the same object values!
+    assertSame(count, sf2.getValue("Count"));
+  }
+
+  /**
+   * Tests for retrieving a 'miscellaneous details' property value, with or
+   * without a supplied default
+   */
+  @Test(groups = { "Functional" })
+  public void testGetValue()
+  {
+    SequenceFeature sf1 = new SequenceFeature("type", "desc", 22, 33,
+            12.5f, "group");
+    sf1.setValue("STRAND", "+");
+    assertEquals("+", sf1.getValue("STRAND"));
+    assertNull(sf1.getValue("strand")); // case-sensitive
+    assertEquals(".", sf1.getValue("unknown", "."));
+    Integer i = new Integer(27);
+    assertSame(i, sf1.getValue("Unknown", i));
+  }
+
+  /**
+   * Tests the method that returns 1 / -1 / 0 for strand "+" / "-" / other
+   */
+  @Test(groups = { "Functional" })
+  public void testGetStrand()
+  {
+    SequenceFeature sf = new SequenceFeature("type", "desc", 22, 33, 12.5f,
+            "group");
+    assertEquals(0, sf.getStrand());
+    sf.setValue("STRAND", "+");
+    assertEquals(1, sf.getStrand());
+    sf.setValue("STRAND", "-");
+    assertEquals(-1, sf.getStrand());
+    sf.setValue("STRAND", ".");
+    assertEquals(0, sf.getStrand());
+  }
+}
index 9c306a3..dcc8ef7 100644 (file)
 package jalview.datamodel;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.PDBEntry.Type;
+
 import java.util.Arrays;
 import java.util.List;
+import java.util.Vector;
 
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
@@ -155,23 +160,22 @@ public class SequenceTest
     assertEquals(2, anns.length);
     assertSame(annotation, anns[0]);
     assertSame(annotation2, anns[1]);
-
   }
 
   @Test(groups = { "Functional" })
   public void testGetStartGetEnd()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
 
-    seq = new Sequence("test", "--AB-C-DEF--");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    sq = new Sequence("test", "--AB-C-DEF--");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
 
-    seq = new Sequence("test", "----");
-    assertEquals(1, seq.getStart());
-    assertEquals(0, seq.getEnd()); // ??
+    sq = new Sequence("test", "----");
+    assertEquals(1, sq.getStart());
+    assertEquals(0, sq.getEnd()); // ??
   }
 
   /**
@@ -181,24 +185,24 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testFindIndex()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(0, seq.findIndex(0));
-    assertEquals(1, seq.findIndex(1));
-    assertEquals(5, seq.findIndex(5));
-    assertEquals(6, seq.findIndex(6));
-    assertEquals(6, seq.findIndex(9));
-
-    seq = new Sequence("test", "-A--B-C-D-E-F--");
-    assertEquals(2, seq.findIndex(1));
-    assertEquals(5, seq.findIndex(2));
-    assertEquals(7, seq.findIndex(3));
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(0, sq.findIndex(0));
+    assertEquals(1, sq.findIndex(1));
+    assertEquals(5, sq.findIndex(5));
+    assertEquals(6, sq.findIndex(6));
+    assertEquals(6, sq.findIndex(9));
+
+    sq = new Sequence("test", "-A--B-C-D-E-F--");
+    assertEquals(2, sq.findIndex(1));
+    assertEquals(5, sq.findIndex(2));
+    assertEquals(7, sq.findIndex(3));
 
     // before start returns 0
-    assertEquals(0, seq.findIndex(0));
-    assertEquals(0, seq.findIndex(-1));
+    assertEquals(0, sq.findIndex(0));
+    assertEquals(0, sq.findIndex(-1));
 
     // beyond end returns last residue column
-    assertEquals(13, seq.findIndex(99));
+    assertEquals(13, sq.findIndex(99));
 
   }
 
@@ -209,65 +213,65 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testFindPosition()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(6, seq.findPosition(5));
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(6, sq.findPosition(5));
     // assertEquals(-1, seq.findPosition(6)); // fails
 
-    seq = new Sequence("test", "AB-C-D--");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(2, seq.findPosition(1));
+    sq = new Sequence("test", "AB-C-D--");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(2, sq.findPosition(1));
     // gap position 'finds' residue to the right (not the left as per javadoc)
-    assertEquals(3, seq.findPosition(2));
-    assertEquals(3, seq.findPosition(3));
-    assertEquals(4, seq.findPosition(4));
-    assertEquals(4, seq.findPosition(5));
+    assertEquals(3, sq.findPosition(2));
+    assertEquals(3, sq.findPosition(3));
+    assertEquals(4, sq.findPosition(4));
+    assertEquals(4, sq.findPosition(5));
     // returns 1 more than sequence length if off the end ?!?
-    assertEquals(5, seq.findPosition(6));
-    assertEquals(5, seq.findPosition(7));
-
-    seq = new Sequence("test", "--AB-C-DEF--");
-    assertEquals(1, seq.findPosition(0));
-    assertEquals(1, seq.findPosition(1));
-    assertEquals(1, seq.findPosition(2));
-    assertEquals(2, seq.findPosition(3));
-    assertEquals(3, seq.findPosition(4));
-    assertEquals(3, seq.findPosition(5));
-    assertEquals(4, seq.findPosition(6));
-    assertEquals(4, seq.findPosition(7));
-    assertEquals(5, seq.findPosition(8));
-    assertEquals(6, seq.findPosition(9));
-    assertEquals(7, seq.findPosition(10));
-    assertEquals(7, seq.findPosition(11));
+    assertEquals(5, sq.findPosition(6));
+    assertEquals(5, sq.findPosition(7));
+
+    sq = new Sequence("test", "--AB-C-DEF--");
+    assertEquals(1, sq.findPosition(0));
+    assertEquals(1, sq.findPosition(1));
+    assertEquals(1, sq.findPosition(2));
+    assertEquals(2, sq.findPosition(3));
+    assertEquals(3, sq.findPosition(4));
+    assertEquals(3, sq.findPosition(5));
+    assertEquals(4, sq.findPosition(6));
+    assertEquals(4, sq.findPosition(7));
+    assertEquals(5, sq.findPosition(8));
+    assertEquals(6, sq.findPosition(9));
+    assertEquals(7, sq.findPosition(10));
+    assertEquals(7, sq.findPosition(11));
   }
 
   @Test(groups = { "Functional" })
   public void testDeleteChars()
   {
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    seq.deleteChars(2, 3);
-    assertEquals("ABDEF", seq.getSequenceAsString());
-    assertEquals(1, seq.getStart());
-    assertEquals(5, seq.getEnd());
-
-    seq = new Sequence("test", "ABCDEF");
-    seq.deleteChars(0, 2);
-    assertEquals("CDEF", seq.getSequenceAsString());
-    assertEquals(3, seq.getStart());
-    assertEquals(6, seq.getEnd());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    sq.deleteChars(2, 3);
+    assertEquals("ABDEF", sq.getSequenceAsString());
+    assertEquals(1, sq.getStart());
+    assertEquals(5, sq.getEnd());
+
+    sq = new Sequence("test", "ABCDEF");
+    sq.deleteChars(0, 2);
+    assertEquals("CDEF", sq.getSequenceAsString());
+    assertEquals(3, sq.getStart());
+    assertEquals(6, sq.getEnd());
   }
 
   @Test(groups = { "Functional" })
   public void testInsertCharAt()
   {
     // non-static methods:
-    SequenceI seq = new Sequence("test", "ABCDEF");
-    seq.insertCharAt(0, 'z');
-    assertEquals("zABCDEF", seq.getSequenceAsString());
-    seq.insertCharAt(2, 2, 'x');
-    assertEquals("zAxxBCDEF", seq.getSequenceAsString());
+    SequenceI sq = new Sequence("test", "ABCDEF");
+    sq.insertCharAt(0, 'z');
+    assertEquals("zABCDEF", sq.getSequenceAsString());
+    sq.insertCharAt(2, 2, 'x');
+    assertEquals("zAxxBCDEF", sq.getSequenceAsString());
 
     // for static method see StringUtilsTest
   }
@@ -279,9 +283,9 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGapMap()
   {
-    SequenceI seq = new Sequence("test", "-A--B-CD-E--F-");
-    seq.createDatasetSequence();
-    assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(seq.gapMap()));
+    SequenceI sq = new Sequence("test", "-A--B-CD-E--F-");
+    sq.createDatasetSequence();
+    assertEquals("[1, 4, 6, 7, 9, 12]", Arrays.toString(sq.gapMap()));
   }
 
   /**
@@ -291,17 +295,17 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGetSequenceFeatures()
   {
-    SequenceI seq = new Sequence("test", "GATCAT");
-    seq.createDatasetSequence();
+    SequenceI sq = new Sequence("test", "GATCAT");
+    sq.createDatasetSequence();
 
-    assertNull(seq.getSequenceFeatures());
+    assertNull(sq.getSequenceFeatures());
 
     /*
      * SequenceFeature on sequence
      */
     SequenceFeature sf = new SequenceFeature();
-    seq.addSequenceFeature(sf);
-    SequenceFeature[] sfs = seq.getSequenceFeatures();
+    sq.addSequenceFeature(sf);
+    SequenceFeature[] sfs = sq.getSequenceFeatures();
     assertEquals(1, sfs.length);
     assertSame(sf, sfs[0]);
 
@@ -310,16 +314,16 @@ public class SequenceTest
      * sequence
      */
     SequenceFeature sf2 = new SequenceFeature();
-    seq.getDatasetSequence().addSequenceFeature(sf2);
-    sfs = seq.getSequenceFeatures();
+    sq.getDatasetSequence().addSequenceFeature(sf2);
+    sfs = sq.getSequenceFeatures();
     assertEquals(1, sfs.length);
     assertSame(sf, sfs[0]);
 
     /*
      * SequenceFeature on dataset sequence only
      */
-    seq.setSequenceFeatures(null);
-    sfs = seq.getSequenceFeatures();
+    sq.setSequenceFeatures(null);
+    sfs = sq.getSequenceFeatures();
     assertEquals(1, sfs.length);
     assertSame(sf2, sfs[0]);
 
@@ -327,9 +331,9 @@ public class SequenceTest
      * Corrupt case - no SequenceFeature, dataset's dataset is the original
      * sequence. Test shows no infinite loop results.
      */
-    seq.getDatasetSequence().setSequenceFeatures(null);
-    seq.getDatasetSequence().setDatasetSequence(seq); // loop!
-    assertNull(seq.getSequenceFeatures());
+    sq.getDatasetSequence().setSequenceFeatures(null);
+    sq.getDatasetSequence().setDatasetSequence(sq); // loop!
+    assertNull(sq.getSequenceFeatures());
   }
 
   /**
@@ -346,8 +350,8 @@ public class SequenceTest
      * right. Also it returns a non-existent residue position for a gap beyond
      * the sequence.
      */
-    Sequence seq = new Sequence("TestSeq", "AB.C-D E.");
-    int[] map = seq.findPositionMap();
+    Sequence sq = new Sequence("TestSeq", "AB.C-D E.");
+    int[] map = sq.findPositionMap();
     assertEquals(Arrays.toString(new int[] { 1, 2, 3, 3, 4, 4, 5, 5, 6 }),
             Arrays.toString(map));
   }
@@ -358,18 +362,18 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testGetSubsequence()
   {
-    SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
-    seq.createDatasetSequence();
+    SequenceI sq = new Sequence("TestSeq", "ABCDEFG");
+    sq.createDatasetSequence();
 
     // positions are base 0, end position is exclusive
-    SequenceI subseq = seq.getSubSequence(2, 4);
+    SequenceI subseq = sq.getSubSequence(2, 4);
 
     assertEquals("CD", subseq.getSequenceAsString());
     // start/end are base 1 positions
     assertEquals(3, subseq.getStart());
     assertEquals(4, subseq.getEnd());
     // subsequence shares the full dataset sequence
-    assertSame(seq.getDatasetSequence(), subseq.getDatasetSequence());
+    assertSame(sq.getDatasetSequence(), subseq.getDatasetSequence());
   }
 
   /**
@@ -378,13 +382,13 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_existingDataset()
   {
-    SequenceI seq = new Sequence("Seq1", "CD");
-    seq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
-    seq.setStart(3);
-    seq.setEnd(4);
-    SequenceI derived = seq.deriveSequence();
+    SequenceI sq = new Sequence("Seq1", "CD");
+    sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
+    sq.setStart(3);
+    sq.setEnd(4);
+    SequenceI derived = sq.deriveSequence();
     assertEquals("CD", derived.getSequenceAsString());
-    assertSame(seq.getDatasetSequence(), derived.getDatasetSequence());
+    assertSame(sq.getDatasetSequence(), derived.getDatasetSequence());
   }
 
   /**
@@ -393,10 +397,10 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_noDatasetUngapped()
   {
-    SequenceI seq = new Sequence("Seq1", "ABCDEF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    SequenceI derived = seq.deriveSequence();
+    SequenceI sq = new Sequence("Seq1", "ABCDEF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    SequenceI derived = sq.deriveSequence();
     assertEquals("ABCDEF", derived.getSequenceAsString());
     assertEquals("ABCDEF", derived.getDatasetSequence()
             .getSequenceAsString());
@@ -408,13 +412,108 @@ public class SequenceTest
   @Test(groups = { "Functional" })
   public void testDeriveSequence_noDatasetGapped()
   {
-    SequenceI seq = new Sequence("Seq1", "AB-C.D EF");
-    assertEquals(1, seq.getStart());
-    assertEquals(6, seq.getEnd());
-    assertNull(seq.getDatasetSequence());
-    SequenceI derived = seq.deriveSequence();
+    SequenceI sq = new Sequence("Seq1", "AB-C.D EF");
+    assertEquals(1, sq.getStart());
+    assertEquals(6, sq.getEnd());
+    assertNull(sq.getDatasetSequence());
+    SequenceI derived = sq.deriveSequence();
     assertEquals("AB-C.D EF", derived.getSequenceAsString());
     assertEquals("ABCDEF", derived.getDatasetSequence()
             .getSequenceAsString());
   }
+
+  @Test(groups = { "Functional" })
+  public void testCopyConstructor_noDataset()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+    seq1.setDescription("description");
+    seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+            1.3d));
+    seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+            12.4f, "group"));
+    seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+    seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345"));
+    
+    SequenceI copy = new Sequence(seq1);
+
+    assertNull(copy.getDatasetSequence());
+
+    verifyCopiedSequence(seq1, copy);
+
+    // copy has a copy of the DBRefEntry
+    // this is murky - DBrefs are only copied for dataset sequences
+    // where the test for 'dataset sequence' is 'dataset is null'
+    // but that doesn't distinguish it from an aligned sequence
+    // which has not yet generated a dataset sequence
+    // NB getDBRef looks inside dataset sequence if not null
+    DBRefEntry[] dbrefs = copy.getDBRefs();
+    assertEquals(1, dbrefs.length);
+    assertFalse(dbrefs[0] == seq1.getDBRefs()[0]);
+    assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0]));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testCopyConstructor_withDataset()
+  {
+    SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF");
+    seq1.createDatasetSequence();
+    seq1.setDescription("description");
+    seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc",
+            1.3d));
+    seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33,
+            12.4f, "group"));
+    seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File"));
+    // here we add DBRef to the dataset sequence:
+    seq1.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBL", "1.2", "AZ12345"));
+
+    SequenceI copy = new Sequence(seq1);
+
+    assertNotNull(copy.getDatasetSequence());
+    assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence());
+
+    verifyCopiedSequence(seq1, copy);
+
+    // getDBRef looks inside dataset sequence and this is shared,
+    // so holds the same dbref objects
+    DBRefEntry[] dbrefs = copy.getDBRefs();
+    assertEquals(1, dbrefs.length);
+    assertSame(dbrefs[0], seq1.getDBRefs()[0]);
+  }
+
+  /**
+   * Helper to make assertions about a copied sequence
+   * 
+   * @param seq1
+   * @param copy
+   */
+  protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy)
+  {
+    // verify basic properties:
+    assertEquals(copy.getName(), seq1.getName());
+    assertEquals(copy.getDescription(), seq1.getDescription());
+    assertEquals(copy.getStart(), seq1.getStart());
+    assertEquals(copy.getEnd(), seq1.getEnd());
+    assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString());
+
+    // copy has a copy of the annotation:
+    AlignmentAnnotation[] anns = copy.getAnnotation();
+    assertEquals(1, anns.length);
+    assertFalse(anns[0] == seq1.getAnnotation()[0]);
+    assertEquals(anns[0].label, seq1.getAnnotation()[0].label);
+    assertEquals(anns[0].description, seq1.getAnnotation()[0].description);
+    assertEquals(anns[0].score, seq1.getAnnotation()[0].score);
+
+    // copy has a copy of the sequence feature:
+    SequenceFeature[] sfs = copy.getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]);
+    assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0]));
+
+    // copy has a copy of the PDB entry
+    Vector<PDBEntry> pdbs = copy.getAllPDBEntries();
+    assertEquals(1, pdbs.size());
+    assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0));
+    assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0)));
+  }
 }
diff --git a/test/jalview/ext/ensembl/ENSG00000157764.gff b/test/jalview/ext/ensembl/ENSG00000157764.gff
new file mode 100644 (file)
index 0000000..21cef29
--- /dev/null
@@ -0,0 +1,56 @@
+##gff-version 3
+# retrieved via http://rest.ensembl.org/overlap/id/ENSG00000157764?feature=gene&feature=transcript&feature=cds&content-type=text/x-gff3
+##sequence-region   7 140719327 140924764
+7      ensembl_havana  gene    140719327       140722790       .       +       .       ID=gene:ENSG00000090266;assembly_name=GRCh38;biotype=protein_coding;description=NADH dehydrogenase (ubiquinone) 1 beta subcomplex%2C 2%2C 8kDa [Source:HGNC Symbol%3BAcc:HGNC:7697];external_name=NDUFB2;logic_name=ensembl_havana_gene;version=11
+7      ensembl_havana  gene    140719327       140924764       .       -       .       ID=gene:ENSG00000157764;assembly_name=GRCh38;biotype=protein_coding;description=B-Raf proto-oncogene%2C serine/threonine kinase [Source:HGNC Symbol%3BAcc:HGNC:1097];external_name=BRAF;logic_name=ensembl_havana_gene;version=11
+7      ensembl snRNA_gene      140884072       140884178       .       +       .       ID=gene:ENSG00000271932;assembly_name=GRCh38;biotype=snRNA;description=RNA%2C U6 small nuclear 85%2C pseudogene [Source:HGNC Symbol%3BAcc:HGNC:47048];external_name=RNU6-85P;logic_name=ncrna;version=1
+7      ensembl_havana  transcript      140719327       140722790       .       +       .       ID=transcript:ENST00000476279;Parent=gene:ENSG00000090266;assembly_name=GRCh38;biotype=protein_coding;external_name=NDUFB2-003;logic_name=havana;version=4
+7      ensembl_havana  transcript      140719327       140721955       .       +       .       ID=transcript:ENST00000461457;Parent=gene:ENSG00000090266;assembly_name=GRCh38;biotype=protein_coding;external_name=NDUFB2-004;logic_name=havana;version=1
+7      ensembl_havana  transcript      140719327       140783157       .       -       .       ID=transcript:ENST00000496384;Parent=gene:ENSG00000157764;assembly_name=GRCh38;biotype=protein_coding;external_name=BRAF-003;logic_name=havana;version=5
+7      ensembl_havana  transcript      140734479       140924764       .       -       .       ID=transcript:ENST00000288602;Parent=gene:ENSG00000157764;assembly_name=GRCh38;biotype=protein_coding;external_name=BRAF-001;logic_name=ensembl_havana_transcript;version=9
+7      ensembl_havana  NMD_transcript_variant  140734521       140754211       .       -       .       ID=transcript:ENST00000479537;Parent=gene:ENSG00000157764;assembly_name=GRCh38;biotype=nonsense_mediated_decay;external_name=BRAF-005;logic_name=havana;version=4
+7      ensembl_havana  NMD_transcript_variant  140734597       140924658       .       -       .       ID=transcript:ENST00000497784;Parent=gene:ENSG00000157764;assembly_name=GRCh38;biotype=nonsense_mediated_decay;external_name=BRAF-002;logic_name=havana;version=1
+7      ensembl_havana  aberrant_processed_transcript   140834061       140924709       .       -       .       ID=transcript:ENST00000469930;Parent=gene:ENSG00000157764;assembly_name=GRCh38;biotype=retained_intron;external_name=BRAF-004;logic_name=havana;version=1
+7      ensembl snRNA   140884072       140884178       .       +       .       ID=transcript:ENST00000605989;Parent=gene:ENSG00000271932;assembly_name=GRCh38;biotype=snRNA;external_name=RNU6-85P-201;logic_name=ncrna;version=1
+7      havana  CDS     140696745       140696842       .       +       0       ID=CDS:ENSP00000419087;Parent=transcript:ENST00000476279;assembly_name=GRCh38
+7      havana  CDS     140702866       140703010       .       +       1       ID=CDS:ENSP00000419087;Parent=transcript:ENST00000476279;assembly_name=GRCh38
+7      havana  CDS     140704860       140704934       .       +       0       ID=CDS:ENSP00000419087;Parent=transcript:ENST00000476279;assembly_name=GRCh38
+7      havana  CDS     140696745       140696842       .       +       0       ID=CDS:ENSP00000420062;Parent=transcript:ENST00000461457;assembly_name=GRCh38
+7      havana  CDS     140721552       140721744       .       +       1       ID=CDS:ENSP00000420062;Parent=transcript:ENST00000461457;assembly_name=GRCh38
+7      havana  CDS     140783021       140783157       .       -       2       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140781576       140781693       .       -       0       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140777991       140778075       .       -       2       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140776912       140777088       .       -       1       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140754187       140754233       .       -       1       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140753275       140753393       .       -       2       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140749287       140749418       .       -       0       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140739812       140739946       .       -       0       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140734617       140734770       .       -       0       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      havana  CDS     140726494       140726516       .       -       2       ID=CDS:ENSP00000419060;Parent=transcript:ENST00000496384;assembly_name=GRCh38
+7      ensembl_havana  CDS     140924566       140924703       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140850111       140850212       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140834609       140834872       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140808892       140808995       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140807960       140808062       .       -       1       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140801412       140801560       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140800362       140800481       .       -       1       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140794308       140794467       .       -       1       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140787548       140787584       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140783021       140783157       .       -       2       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140781576       140781693       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140777991       140778075       .       -       2       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140776912       140777088       .       -       1       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140754187       140754233       .       -       1       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140753275       140753393       .       -       2       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140749287       140749418       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140739812       140739946       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      ensembl_havana  CDS     140734597       140734770       .       -       0       ID=CDS:ENSP00000288602;Parent=transcript:ENST00000288602;assembly_name=GRCh38
+7      havana  CDS     140754187       140754211       .       -       0       ID=CDS:ENSP00000418033;Parent=transcript:ENST00000479537;assembly_name=GRCh38
+7      havana  CDS     140753275       140753393       .       -       2       ID=CDS:ENSP00000418033;Parent=transcript:ENST00000479537;assembly_name=GRCh38
+7      havana  CDS     140749287       140749418       .       -       0       ID=CDS:ENSP00000418033;Parent=transcript:ENST00000479537;assembly_name=GRCh38
+7      havana  CDS     140747415       140747447       .       -       0       ID=CDS:ENSP00000418033;Parent=transcript:ENST00000479537;assembly_name=GRCh38
+7      havana  CDS     140924566       140924658       .       -       0       ID=CDS:ENSP00000420119;Parent=transcript:ENST00000497784;assembly_name=GRCh38
+7      havana  CDS     140850111       140850212       .       -       0       ID=CDS:ENSP00000420119;Parent=transcript:ENST00000497784;assembly_name=GRCh38
+7      havana  CDS     140834609       140834872       .       -       0       ID=CDS:ENSP00000420119;Parent=transcript:ENST00000497784;assembly_name=GRCh38
+7      havana  CDS     140808892       140808995       .       -       0       ID=CDS:ENSP00000420119;Parent=transcript:ENST00000497784;assembly_name=GRCh38
+7      havana  CDS     140808295       140808316       .       -       1       ID=CDS:ENSP00000420119;Parent=transcript:ENST00000497784;assembly_name=GRCh38
diff --git a/test/jalview/ext/ensembl/EnsemblRestClientTest.java b/test/jalview/ext/ensembl/EnsemblRestClientTest.java
new file mode 100644 (file)
index 0000000..6f7c1ad
--- /dev/null
@@ -0,0 +1,69 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.AlignmentI;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class EnsemblRestClientTest
+{
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    EnsemblRestClient sf = new EnsemblRestClient()
+    {
+  
+      @Override
+      public String getDbName()
+      {
+        return null;
+      }
+  
+      @Override
+      public AlignmentI getSequenceRecords(String queries) throws Exception
+      {
+        return null;
+      }
+  
+      @Override
+      protected URL getUrl(List<String> ids) throws MalformedURLException
+      {
+        return null;
+      }
+  
+      @Override
+      protected boolean useGetRequest()
+      {
+        return false;
+      }
+  
+      @Override
+      protected String getRequestMimeType(boolean b)
+      {
+        return null;
+      }
+  
+      @Override
+      protected String getResponseMimeType()
+      {
+        return null;
+      }
+  
+    };
+    boolean isAvailable = sf.isEnsemblAvailable();
+    if (isAvailable)
+    {
+      System.out.println("Ensembl is UP!");
+    }
+    else
+    {
+      System.err
+              .println("Ensembl is DOWN or unreachable ******************* BAD!");
+    }
+  }
+
+}
diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
new file mode 100644 (file)
index 0000000..10ecfe0
--- /dev/null
@@ -0,0 +1,212 @@
+package jalview.ext.ensembl;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceI;
+import jalview.io.AppletFormatAdapter;
+import jalview.io.FastaFile;
+import jalview.io.FileParse;
+
+import java.lang.reflect.Method;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+
+public class EnsemblSeqProxyTest
+{
+  private static final Object[][] allSeqs = new Object[][] {
+      {
+          new EnsemblProtein(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH\n" },
+      {
+          new EnsemblCdna(),
+          "CCDS5863.1",
+          ">CCDS5863.1\n"
+                  + "ATGGCGGCGCTGAGCGGTGGCGGTGGTGGCGGCGCGGAGCCGGGCCAGGCTCTGTTCAAC\n"
+                  + "GGGGACATGGAGCCCGAGGCCGGCGCCGGCGCCGGCGCCGCGGCCTCTTCGGCTGCGGAC\n"
+                  + "CCTGCCATTCCGGAGGAGGTGTGGAATATCAAACAAATGATTAAGTTGACACAGGAACAT\n"
+                  + "ATAGAGGCCCTATTGGACAAATTTGGTGGGGAGCATAATCCACCATCAATATATCTGGAG\n"
+                  + "GCCTATGAAGAATACACCAGCAAGCTAGATGCACTCCAACAAAGAGAACAACAGTTATTG\n"
+                  + "GAATCTCTGGGGAACGGAACTGATTTTTCTGTTTCTAGCTCTGCATCAATGGATACCGTT\n"
+                  + "ACATCTTCTTCCTCTTCTAGCCTTTCAGTGCTACCTTCATCTCTTTCAGTTTTTCAAAAT\n"
+                  + "CCCACAGATGTGGCACGGAGCAACCCCAAGTCACCACAAAAACCTATCGTTAGAGTCTTC\n"
+                  + "CTGCCCAACAAACAGAGGACAGTGGTACCTGCAAGGTGTGGAGTTACAGTCCGAGACAGT\n"
+                  + "CTAAAGAAAGCACTGATGATGAGAGGTCTAATCCCAGAGTGCTGTGCTGTTTACAGAATT\n"
+                  + "CAGGATGGAGAGAAGAAACCAATTGGTTGGGACACTGATATTTCCTGGCTTACTGGAGAA\n"
+                  + "GAATTGCATGTGGAAGTGTTGGAGAATGTTCCACTTACAACACACAACTTTGTACGAAAA\n"
+                  + "ACGTTTTTCACCTTAGCATTTTGTGACTTTTGTCGAAAGCTGCTTTTCCAGGGTTTCCGC\n"
+                  + "TGTCAAACATGTGGTTATAAATTTCACCAGCGTTGTAGTACAGAAGTTCCACTGATGTGT\n"
+                  + "GTTAATTATGACCAACTTGATTTGCTGTTTGTCTCCAAGTTCTTTGAACACCACCCAATA\n"
+                  + "CCACAGGAAGAGGCGTCCTTAGCAGAGACTGCCCTAACATCTGGATCATCCCCTTCCGCA\n"
+                  + "CCCGCCTCGGACTCTATTGGGCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATT\n"
+                  + "CCAATTCCACAGCCCTTCCGACCAGCAGATGAAGATCATCGAAATCAATTTGGGCAACGA\n"
+                  + "GACCGATCCTCATCAGCTCCCAATGTGCATATAAACACAATAGAACCTGTCAATATTGAT\n"
+                  + "GACTTGATTAGAGACCAAGGATTTCGTGGTGATGGAGGATCAACCACAGGTTTGTCTGCT\n"
+                  + "ACCCCCCCTGCCTCATTACCTGGCTCACTAACTAACGTGAAAGCCTTACAGAAATCTCCA\n"
+                  + "GGACCTCAGCGAGAAAGGAAGTCATCTTCATCCTCAGAAGACAGGAATCGAATGAAAACA\n"
+                  + "CTTGGTAGACGGGACTCGAGTGATGATTGGGAGATTCCTGATGGGCAGATTACAGTGGGA\n"
+                  + "CAAAGAATTGGATCTGGATCATTTGGAACAGTCTACAAGGGAAAGTGGCATGGTGATGTG\n"
+                  + "GCAGTGAAAATGTTGAATGTGACAGCACCTACACCTCAGCAGTTACAAGCCTTCAAAAAT\n"
+                  + "GAAGTAGGAGTACTCAGGAAAACACGACATGTGAATATCCTACTCTTCATGGGCTATTCC\n"
+                  + "ACAAAGCCACAACTGGCTATTGTTACCCAGTGGTGTGAGGGCTCCAGCTTGTATCACCAT\n"
+                  + "CTCCATATCATTGAGACCAAATTTGAGATGATCAAACTTATAGATATTGCACGACAGACT\n"
+                  + "GCACAGGGCATGGATTACTTACACGCCAAGTCAATCATCCACAGAGACCTCAAGAGTAAT\n"
+                  + "AATATATTTCTTCATGAAGACCTCACAGTAAAAATAGGTGATTTTGGTCTAGCTACAGTG\n"
+                  + "AAATCTCGATGGAGTGGGTCCCATCAGTTTGAACAGTTGTCTGGATCCATTTTGTGGATG\n"
+                  + "GCACCAGAAGTCATCAGAATGCAAGATAAAAATCCATACAGCTTTCAGTCAGATGTATAT\n"
+                  + "GCATTTGGAATTGTTCTGTATGAATTGATGACTGGACAGTTACCTTATTCAAACATCAAC\n"
+                  + "AACAGGGACCAGATAATTTTTATGGTGGGACGAGGATACCTGTCTCCAGATCTCAGTAAG\n"
+                  + "GTACGGAGTAACTGTCCAAAAGCCATGAAGAGATTAATGGCAGAGTGCCTCAAAAAGAAA\n"
+                  + "AGAGATGAGAGACCACTCTTTCCCCAAATTCTCGCCTCTATTGAGCTGCTGGCCCGCTCA\n"
+                  + "TTGCCAAAAATTCACCGCAGTGCATCAGAACCCTCCTTGAATCGGGCTGGTTTCCAAACA\n"
+                  + "GAGGATTTTAGTCTATATGCTTGTGCTTCTCCAAAAACACCCATCCAGGCAGGGGGATAT\n"
+                  + "GGTGCGTTTCCTGTCCACTGA\n" },
+      {
+          new EnsemblProtein(),
+          "ENSP00000288602",
+          ">ENSP00000288602\n"
+                  + "MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEH\n"
+                  + "IEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTV\n"
+                  + "TSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDS\n"
+                  + "LKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRK\n"
+                  + "TFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPI\n"
+                  + "PQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQR\n"
+                  + "DRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSP\n"
+                  + "GPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDV\n"
+                  + "AVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHH\n"
+                  + "LHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATV\n"
+                  + "KSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNIN\n"
+                  + "NRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARS\n"
+                  + "LPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH" } };
+
+  @DataProvider(name = "queries")
+  public Object[][] createQueryData(Method m)
+  {
+    return new Object[][] { { "CCDS5863.1" }, { "ENSP00000288602" } };
+  }
+
+  @Test(dataProvider = "queries")
+  public void testIsValidReference(String query) throws Exception
+  {
+    EnsemblSequenceFetcher esq = new EnsemblProtein();
+    Assert.assertTrue(esq.isValidReference(query),
+            "Expected reference string " + query
+                    + " to be valid for regex "
+                    + esq.getAccessionValidator().toString());
+  }
+
+  @DataProvider(name = "ens_seqs")
+  public Object[][] createData(Method m)
+  {
+    System.out.println(m.getName());
+    return allSeqs;
+  }
+
+  @Test(dataProvider = "ens_seqs", suiteName = "live")
+  public void testGetOneSeqs(EnsemblRestClient proxy, String sq, String fastasq)
+          throws Exception
+  {
+    FileParse fp = proxy.getSequenceReader(Arrays
+            .asList(new String[]
+    { sq }));
+    SequenceI[] sqs = new FastaFile(fp).getSeqsAsArray();
+    FastaFile trueRes = new FastaFile(fastasq, AppletFormatAdapter.PASTE);
+    SequenceI[] trueSqs = trueRes.getSeqsAsArray();
+    Assert.assertEquals(sqs.length, trueSqs.length,
+            "Different number of sequences retrieved for query " + sq);
+    Alignment ral = new Alignment(sqs);
+    for (SequenceI tr : trueSqs)
+    {
+      SequenceI[] rseq;
+      Assert.assertNotNull(
+              rseq = ral.findSequenceMatch(tr.getName()),
+              "Couldn't find sequences matching expected sequence "
+                      + tr.getName());
+      Assert.assertEquals(rseq.length, 1,
+              "Expected only one sequence for sequence ID " + tr.getName());
+      Assert.assertEquals(
+              rseq[0].getSequenceAsString(),
+              tr.getSequenceAsString(),
+              "Sequences differ for " + tr.getName() + "\n" + "Exp:"
+                      + tr.getSequenceAsString() + "\n" + "Got:"
+                      + rseq[0].getSequenceAsString());
+  
+    }
+  }
+
+  @Test(suiteName = "live")
+  public void testLiveCheckEnsembl()
+  {
+    EnsemblRestClient sf = new EnsemblRestClient()
+    {
+
+      @Override
+      public String getDbName()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      public AlignmentI getSequenceRecords(String queries) throws Exception
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      protected URL getUrl(List<String> ids) throws MalformedURLException
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      protected boolean useGetRequest()
+      {
+        // TODO Auto-generated method stub
+        return false;
+      }
+
+      @Override
+      protected String getRequestMimeType(boolean b)
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+      @Override
+      protected String getResponseMimeType()
+      {
+        // TODO Auto-generated method stub
+        return null;
+      }
+
+    };
+    boolean isAvailable = sf.isEnsemblAvailable();
+    System.out.println("Ensembl is "
+            + (isAvailable ? "UP!"
+                    : "DOWN or unreachable ******************* BAD!"));
+  }
+  // todo lots of tests
+}
\ No newline at end of file
diff --git a/test/jalview/ext/htsjdk/TestHtsContigDb.java b/test/jalview/ext/htsjdk/TestHtsContigDb.java
new file mode 100644 (file)
index 0000000..5e0f99a
--- /dev/null
@@ -0,0 +1,32 @@
+/**
+ * 
+ */
+package jalview.ext.htsjdk;
+
+import jalview.datamodel.SequenceI;
+
+import java.io.File;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * @author jprocter
+ *
+ */
+public class TestHtsContigDb
+{
+  @Test
+  public final void testHTSReferenceSequence() throws Exception
+  {
+    HtsContigDb remmadb = new HtsContigDb("REEMADB", new File(
+            "test/jalview/ext/htsjdk/pgmb.fasta"));
+
+    Assert.assertTrue(remmadb.isValid());
+
+    SequenceI sq = remmadb.getSequenceProxy("Deminut");
+    Assert.assertNotNull(sq);
+    Assert.assertNotEquals(0, sq.getLength());
+  }
+
+}
diff --git a/test/jalview/ext/htsjdk/pgmB.fasta b/test/jalview/ext/htsjdk/pgmB.fasta
new file mode 100644 (file)
index 0000000..e759925
--- /dev/null
@@ -0,0 +1,183 @@
+>DDB_G0280897 
+MTDKINNLINQWLKWDKNEITRKEIEQLKENNNEKELLVRLEERIQFGTAGLRGAMRAGF
+SCMNDLTVTQASQGLCEYVIETIEQSKSKGIVIGYDGRHNSYIFAKITAATFKSKGFKVY
+LFSHIVPTPYVSFAVPNLKAAIGVMITASHNPKNDNGYKVYWETGCQINTPHDKGISKKI
+DENLEPWSNVDATSDIKYGNGDDGESMIDPLSVITELYNKNIKEYSVGSKIELANEPIVY
+TAMHGVGGVYAKKAFETFQLKPFIPVAQQIEPDAEFPTVTYPNPEEGKGALKLSIETAEA
+NNSRLILANDPDADRLAVAEKLADGSWKVFNGNEIGVLLADWAWTNRSTLTKGGSTLENN
+KYFMINTAVSSAMLKTMSEKEGFIHQECLTGFKWIGNAAYNAINNNDGTTFLFGYEEAIG
+FQYGDVSFDKDGVRAAAIFAEFALSLYKKGSSVQDHLESMYKRYGYHISKNRYFFCYEPS
+KMVSIFNKIRNDGKYLTKLGDDDDEQFTITRIRDLTTGYDNGYPDCKARLPVSSSTQMIT
+FYFKNGGIATLRGSGTEPKLKYYVEMIGEVKSNVESTLTKAVELVINQLLKPIENQLEPP
+KDD
+>PPL_06716 
+MSNIKELAESWLKWDKNAETRKEIQSLLESDNQSELKSRLEQRIAFGTAGLRGPMKAGFS
+CMNDLTVIQASQGLCIYVEQTLSNSKNSGIVVGYDGRHHSKEFARLTAATFASRGFKVYL
+FSKIVPTPYVVILYLISNYMDCYVHQAFAVPELKASVGVMITASHNPKDDNGYKVYWDNG
+CQINTPHDIRIAMQIDLNLEPWNIDVNELLNGSLVSDPLDTITKSYFGKIAKYSVKNEVK
+LATSEKIVYTAMHGVGGEYAKMAFETFGLPAFIPVDQQIQPDPEFPTVAFPNPEEGKGAL
+KLSIETAERNNSRLILANDPDADRLAVAERQPDGQWKVFNGNEIGVLFADWAWQNARRAD
+STTPAERFCMINTAVSSSMLKTMANKDGYRHEECLTGFKWVGNKARELMDKGYNFLFAYE
+EAIGFMYGDVSLDKDGVRCAPIFAELALTCYQAGKSCQDHLEELYKRYGYHISKNRYFFC
+YDPKKMVAIFDKIRNYGQFPTNCGDFYITRVRDLTVGYDSGYPDHKARLPVSSSTQMITF
+YFENGGIATLRGSGTEPKLKYYVEMIGSDRQLVESTLSQLVEQVINQFLRPVENELTPPK
+DD
+>DFA_03821 
+MTDINQLAQNWLKWDRNPKTHKEIEQLVEAKDENELRARLENRIAFGTAGIVSTTIVQSH
+MNIGPMKAGFANMNDLTVIQASQGLSIYVQETISQAQSKGVVVGYDGRYNSEVFAKLTAA
+TFASKGFKVYLFSKIVPTPFVAFAVPELGASVGVMVTASHNPKDDNGYKVYWDNGCQINT
+PHDKGIAKQIDLNLEPWTINIDKLLSSELVNDPLETISNAYFSKIYSYSVKNRSTPLELA
+NEKVVYTAMHGVGGDYVKKAFETFKLPPYVEVAQQIKPDPAFPTVAFPNPEEGKGALKLS
+IETAESVNSRLILANDPDADRLAVAEKLKDGSWKVFNGNEIGILLADWAWTNAKINHPDV
+PAEKFFMINTAVSSAMLKTMAKKEGYICEETLTGFKWVGNKAKEMIDQGYKFLFAYEEAI
+GFMYGDVSLDKDGVRCAPIFAEYALNLYANGSSCQDHLDHLMQRYGYHISKNRYFFCYEP
+SKMVRIFNDIRKSNNGQFPDKCGPYEIIRIRDLTVDYDTAYPDNKARLPVSTSTQMITFY
+FKNGAIATLRGSGTEPKLKYYVEMIGDNKQEVESTLQQVVQQVIDNFLQPVVNQLTPPKD
+D
+>DLA_10096 
+MDIYTLANKWLEWDKNEKNRKEIQHFVDEKNEQELRERLENRIQFGTAGLRGPMKAGFAN
+MNDLTVIQASQGLALYVKETIDSALTKGVVVGYDGRHNSQTFARLTAATFLSKGFKVYLF
+SKLVPTPFVAFAVPELGASCGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGISKLIDE
+NLVPWTMNLDDLNKSDLVSDPLERVSKSYFTKISKYSVVKSGATIKQEKVVYTPMHGVGG
+DYAAEAFKVFDLHPFIPVELQIKPDAEFPTVAFPNPEEGKGALKLAIETAESNQSRLILA
+NDPDADRLAVAEKQSSDGSWKVFNGNEIGVLFADWAWRKERALFSEGYNCKPSEYTMIST
+AVSSAMLSTMAKKEGFQHEEVLTGFKWVGNAAKQAMDRGQKFLFAYEEAIGFMYGDVSLD
+KDGVRGASIFAELAFDLYQQGSSCQEHLESLYKKYGYHISNNRYFFCYDPKKMVRIFNEI
+RGNNREYVKELGEFKVERIRDLTTGYDTAFPPEFKAQLPTSSSTQMITFYFTNGSIATLR
+GSGTEPKLKYYVESIGSDKLQVQQTLTKLVSLVIEKLLRPKENELTPPKESVGSERLLAL
+LSEVMSTSMKIQVKYNESITEYNIIKGVKLLTQIDVLCQIFKVDANPDRFVLNYRESNLI
+LSEDNLSKLFSNEISSCSSQSQNGSNGELSSLYSSFGENSSNNNNNSTLKFELILAPIYQ
+VDSVLEHLNNSNLIKKRII
+>DPU1265769
+MSMIRSISGVRGVIGQSWTPTLVSNHIIGFTQLLESEKYYNQKQKKIVVGRDSRVSGPWI
+EMIVNGSLISMGYQVIHIDIAATPTVQYMVEKTKSSGGIVITSSHNPVEWNGLKFVGPDG
+LFIAPVECEVLFSLADNPSSFKFPNYDKLGSVVCNTTANKEHIEAIFKLPFISVDKIKEK
+KFKVCLDSVNGAGGPIMSYLLTELGCEVIGINLEPTGLFAHTPEPVPANLGQLCELVKTH
+KADFGIAVDPDVDRCVFIDDKGVPLGEEYTLAMAVELLLGDCGRRGNVCKNLSSSRAIDD
+ICKKYDSQVICAPVGEIQVAKKMQQVNAVIGGEGNGGVMLPDIHIGRDAPVAATLALQLL
+ANRNAASISEFKRTTLPTYEIVKLKAGIEGLDPDAILAEYTKQYENKEGVVINQEDGLKI
+DSADWWVHLRKSNTEHIIRVISEAKNTKEATDIATKFINEIESKRK
+>440792448
+MASRVSGRMRKISDETQQMVNAWLSVDWDPESREHVKGLVAAGKEEELVAHLGRRISFGT
+AGLRGKMKWGFAFMNAVTVTQASQGLCAYLRTVHPCLTDLRERGVIVGHDGRYNSRMFAR
+LTAAVFLSRKIKVHLFRDDVPTPLVAFGVRHLKCAAGVMVTASHNPKEDNGYKVYWANSA
+QITAPHDAQIARAIEANFSIWDRMPDDKAIDEHPLCLDPTTDVCAAYLAAARHWSFRTPQ
+QNAAAQLRVVYTAMHGVGGQSVERIFDAFGLPPVIAVREQHDPDPDFTTVEFPNPEEANG
+CSLRLAMSTADREGAPLILANDPDADRLAVAERQRDSGEWRILDGNEIALLLADWLWRNY
+TERHPEVDRAKIVMLNSTVSSKALAAMAAKEGFHYRETLTGFKWLGNLADELVRAGYTFL
+FAYEVEIGFMIGDMSLDTDGVRAAPVFVEMANHLYERGLTLSDHLDNLYHKYGYYKMAVG
+YYFCHDPRLMDQIFNEIRNDGLYISTCGDHKVQYVRDLTTGFDNSQPHNRAVLPVSSAAH
+MITFTFENECVATFRGSGTEPKLKYYIEVANASNEQLATDLLDSMKQEIIDRFLQPSQNG
+LRPPAAAEDAHNSPHNSGNSPEQMAPARIARDVIHKEIQALQNLEATLGRDFEKVVEIIE
+SRGSGRVIFTGVGKSGIIAQKISASFSSLGISSFFVHATEAAHGDLGVITAEDVIIAISN
+SGNTPELIFIIPSLRVLAGKIIGITSNKDSLLARYSDASIITGKIMEADQHKIAPTASTI
+VCLAIGDALAVTLSARMKFTLPEFGLRHPGGVLGEKVLGKVFQEFAMKGQGRFLRFWKRM
+TNEERDKLRRDFERIDLAELSRIYLQCRSKAEKGAIDPHSLEPLPSHTWVKLHESDPAAV
+AAWRDAGLRALREGKIGVVLMAGGQATRLGMTMPKGFLDLNLPSHKSLYQLHAEKLLRLQ
+DEVRQTFGGGGGDEEVQQQQQQIQIPFYVMTSPEALQQTHQFFIKHQFFGLCPKQVFFFK
+QRSLPCVAPSGEIIMDTKCSVVFSPDGHGGLFVALKDAKAYEDMKRRGVEYVFAFGVDNP
+LCEVADPAYMGYCIQRNVKMGYKVVDRRDPQETAGVVCVRDGVINCVEYSELPESVAELR
+DEQSGELVYNAANMLNLFFTLRFMRKIADNPSLMEYHLAKKRIPFVNDNGVRTEPLVPNG
+WKFEKYLVDCTPYANNSVAVMFVKREEEFAPIKNGWNSEVDSPRSARRLLAAHYRRRIER
+AGGKLAADDPDKMVEVSPLVTDRKLAQLLQDKHLVTGPAVLQ
+>ENY64621.1 
+MALNNYIKKTEMDYLYEQAALWLKWDKTPETRKEIEDLVASKNEEELKKRFCKRIEFGTA
+GLRGKMCAGFNCMNNLIVQQASQGLALAVEELVQNAHEKGVVIGYDGRYHSKEFAAITAK
+VFISKGFKTYLFSTLCPTPWTAFAVGYLKTACGVMVTASHNPKADNGYKVYWENGCQIIE
+PIDANIASKIHSNLEPWDLSNVDISKVIDPLADVSAEYYKQMMLTIPHFECPEQPKVKYV
+YTAMHGVGSKYVQDAFKTAKLPQPILVPLQNEPDPEFPTVPFPNPEEGKGALKCSIEVAE
+ANGATVIIANDPDADRLSVAVKSGNGWRQFTGNEMANLIADWTYNKYIVSGDKTPAFMVR
+STVSSSFISKMGEVEGFDTYETLTGFKWIGNKAKEIVDTQHKKLLMAYEEAIGFVIGNMS
+YDKDGVRAAVCFAAMALEYAEQGFNLEDRLNMLYEKYGYFASNNKYYFCYDPKLMEKIFN
+KMRNNGQYYWKFGKYAVKSIRDLTVGIDTAQPDKKPLLPVSASTQMITYTFENGCKATLR
+GSGTEPKLKYYIELPGKKGVKAEDVIAELMDLSHELLQASLEPEKNGLIPPKAE
+>Ppo014092.000
+MSISPSVQELVGKWLQWDKNPQNIKEIKDLVAANNEAELKNRLATRIAFGTAGLRGPMRA
+GFSCMNDLTVIQASQGLCKYLQQMVSDIKTRGIVVGYDGRHHSKEFAEWTAATFLSQGIT
+VYLFTRLVPTPFVSYATPLLRCAAGIMITASHNPKDDNGYKVYWDNGCQINVPHDKGISD
+CIEQNLTPWDINKAELLKSELVKDPTETVASAYLKEIKAKCCFHHDENSQKIPVTYTAMH
+GVGSEWVARAFEVFGLAPYVPVAPQISADPEFPTVAFPNPEEGKGALKLSMEAADKAGST
+LILATDPDADRLAVAEKLPSGSWKIFTGNEIGALLAYWAWLKYKERNPKVDPSKCVVINS
+TVSSKLLKALADKEGLKYDETLTGFKWIGGQAAIRIKEGYTFIFGFEEAIGFLFGDVNLD
+KDGVRAAAVFAEMNIQLHKQGITVVQQLEKIYKLYGYFITRNRYFFCYDPAKMERIFNAI
+RNYNNSGTYPTSCGPFKIKNTRDLTTGYDDSQTDKKAILPVSKSTQMITFFFENGGVVTL
+RGSGTEPKLKYYTELSGSDPEKVKSTLDEMVQAIIDTCLKPVENQLQPPSDE
+>ADB0001102_3
+MSTTTSINKLAQDWLKWDKNPKTRAEIQELVEQNDVKELTARLENRIAFGTAGLRGPMKA
+GFSCMNDLTVIQASQGLCLYVIDTIPNAIKSGVVIGYDGRYNSKEFAKYTAATFLSKGYK
+VYLFSKVVPTPYVAFAVTDLKASIGVMITASHNPKDDNGYKVYWENGCQINTPHDKGIAK
+LIDLNLEPWEINVDQLLSGPLVEDPLDRIVSSYNTKIAQYSVASHVKFANEKIIYTAMHG
+VGGEYTKMAFEAFKLPPFIPVAQQYQPDPAFPTVTFPNPEEGKGALKLSIETAEANGSRL
+ILANDPDADRLAVAERLKDGTWKVFNGNEIGVLLADWAWQNARRSHPDTPAEKFFMINTA
+VSSAMLKTMAKKDGYRCEETLTGFKWVGNRAREVMDAEGLHFLFAYEEAIGFLYGDVSLD
+KDGVRCAAIFAELALSYYANGSSCEDHLESLYKRYGYHISRNRYFFCYEPPKMVAIFNKI
+RNNRNFPTKCGRFEIERVRDLTIDYDDGFPDKKARLPVSTSTQMITFYFKNGAIATLRGS
+GTEPKLKYYVEMIGQDKAHVQQELAELVQCIINEFLRPVENELTPPKDD
+>Carpum
+MTQSTCITSMVINNYLSIYIFIYTINDYLKRSLFVLCLVAKMSHHKVAITHPISSYNSII
+NELAQNWLRWDKNKETRKEIEQLVEQKNEKELYDCLAKRIAFGTADNEIMMLLTHTLHTG
+LRGQMKAGFSNMNDLTVIQASQGLCKYVKETIPEAQKKGVVVGYDCRHHSETFARLTAAT
+FASQGFTVYLYSKMVPTPFVAFGVTDLKACVGVMVTASHNPKEDNGYKVYWENGCQINSP
+HDKGISQQIELNLEPWTIDVNSLLEKVDDPLERVTKSYMDQISKYSVRGSVDMATENVVY
+TAMHGVGGVFVKDAFAAFGLAPYIPVPAQVGPDAEFPTVTLPNPEEGKGALKLSIETAEA
+NNSRLIVANDPDADRLAAAEKLKDGSWKVFNGNEIGVLFADWAWQNARRQHGGDSINPSE
+YFMVTTAVSSSMLRTMATKEGYGYDETLTGFKWVGNKARDLIDQGKKFLFAYEESIGYMY
+GEVSLDKDGVRGAAVFTEMALSCYARGTSCQEHLESLYVKYGYHLSKNRYYFCYDPSKMV
+SIFNRIRNNGEFPKTCGPFEITRIRDLTVDYDNGYEDKKARLPVSSSTQMITFYFKNGAI
+ATLRGSGTEPKLKYYVEMIGDDKEQVKATLDQVHDQVIQQFLRPTENQLSPPSDE
+>Cephalum
+MTTDIYQIAQNWLRWDRNPKTHKEISQLVQDKNESELKARLESRIAFGTAGLRGPMKAGF
+SCMNDLTVIQASQGLCMYVKQTLAPDAERKGIVVGYDGRYNSEVFAKLTAATFVSQGFKV
+HLFSRLVPTPFVAFAVPFLKACVGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGIAKQ
+IELNLEPWNVFYKEYFDRIERYTVRHNKQMAREKIVYSAMHGVGGEYTKRAFEVFALDPF
+IAVKEQFHPDPAFPTVTFPNPEEGKGALKLSIETAEANNNWAWKNGKPYYEKGLGSFPND
+QYFMINTAVSSAMLKTMAMKEGFTYEEVLTGFKWVGNAAQNLIEKGKHLLFAYEEAIGFM
+YGDVSLDKDGVRCAPIFAELAQHLYSKGSSCQDHLEELYKRYGYHISKNRYFFCYDPLKM
+EKIFNRIRNGGQYPTKCGDFEITRIRDLTTGYDTGYPPENKAQLPTSTSTQMITFYFKNG
+GIATLRGSGTEPKLKYYVEMIGDDKENVELILQSMVDQVINQFLRPIENELIPPKD
+>Violaceum
+MVINPFYPYYLYFCYSPGISYQGVKINKTKLEQSTLTTINQWLNGNYDEQTKKNIQNLLD
+QESYTELTDAFYRNLEFGTGGLRGIMGAGSNRINKYTIGTATQGLSNYLLKKYPGEKIKV
+AIAHDSRNNSDQFAKITADVFSANGIYVYFFKELRPTPELSFAIRELGCRSGVMLTASHN
+PKEYNGYKAYGADGGQFTAPDDRLVMDEVAKITSIDEVKFTRIDANIELIGEEIDQLYLD
+KITALSVSPEAISRQKDLKIVYSPIHGTGITLVPKALAQFGFDNVTIVEEQSKPDGNFPT
+VVYPNPEEKEAMTLALKKAQEIDADLVLATDPDADRVGIAVKNNNNEWILLNGNQTGSLL
+VHYVLTAWEEKGKIDGNQYIVKTVVTSNLIEAIAKAKKVDCYNTLTGFKWIGQLITSLQG
+KKTFVVGGEESYGYSVGELVRDKDAVISCAFIAEMTAYYKDKGSSLYNALIDMYVTHGLY
+KEELVSLTKKGKTGAEEIKAMMEKFRNNPPASLGGSKVSTLKDYELGTETDLNTGKISKL
+SLPKSDVLQFVTEDGSIVSARPSGTEPKIKFYCSVNATLSQASEFDKTDEKLGLKINALM
+EDLQK
+>Deminut
+MTDIYQIAQNWLKWDRNPKTHKEISTLVEKKDEAELRARLETRIAFGTAGLRGPMKAGFS
+CMNDLTVIQASQGLSLYVKKTLAGSESKGAVVGYDGRYNSEVFAKLTAATFASQGFKVYL
+FSKVVPTPYVAFAVPELGASVGVMVTASHNPKDDNGYKVYWDNGCQINTPHDKHISELIE
+SNLEPWNVCIYITLQINIDKLLSGVIDPLQVVTSSYMSKIEKYSVKHLPQPLKLATEQKI
+VYTAMHGVGAEYAKLAFEAFSLPPFIPVTQQVTPDPAFPTVAFPNPEEGKGALKLAIETA
+EANKSRIILANDPDADRLAVAEKQPEYVFLFYLISNNGTWKVFNGNEIGILFADWAWQNC
+RRVYPDVPADQFFMINTAVSSAMLKSMAKKDGYIHEETLTGFKWVGNKARELLDQNKRFL
+FAYEEAIGFMYGDVSLDKDGVRCAAIFAELALYQYANGSSCQRHLDSLYERYGYHISKNR
+YFFCYEPPKMVAIFNAIRNNKNYPTKCGEFEIERIRDLTDDYDNGYPDNKARLPISKSTQ
+MITFFFKNGAIATLRGSGTEPKLKYYVEMIGDNKSEVEAILAKVVTAVIDNFLRPVENQL
+TPPKDD
+>Ellipt
+MADLDKLVEDWMRWDKNTKTRDEVQKMVAQGDKKALAAALQNRIAFGTAGLRGPMKAGFA
+NMNDLTVIQASQGLCIYVSATIADAAKKGVVVGYDGRHNSLQFARLTAATFRSKGFKVYL
+FSTVVPTPYVAFSVPELGACVGVMVTASHNPKDDNGYKIDVEKLLKEDGVEDPLEKITAS
+YMSKVADYSIKSHPATKDIVMSDDKIVYTAMHGVGGEYTRRSFKAFSLPEFIPVVQQFHP
+DPEFPTVTFPNPEEGKGALKLAIETAEKNNSRLILANDPDADRLAVAERQPDGTWKVFNG
+NEIGVLFADWAWKNARARDPTTPASEFFMVNTAVSSAMLKTMAKTEGYTYEETLTGFKWV
+GNKAKEAIDKGGRFLFAYEEAIGFMYGDVSLDKDGVRTAPIFAQMALSLYAKGLSCVDHL
+EQLMKTYGYHISRNRYFFCYEPPKMVAIFDKIRNNGNFPKHCGPFEIVRVRDLTVDYDDA
+YEDKKARLPVSTSTQMITFYFKNGAIATLRGSGTEPKLKYYVEMIGDKSAKKEDVEKTLA
+EVVKQVIDNFLRPVENELTPPKDD
+>Lepto
+MASSERLQQLIQDWLKWDKNPTTLSEIQELVKKNDEKELRARLENRIAFGTAGMFLLGPM
+KAGFSCMNDLTVIQASQGLCIYVSDTIPNALNSGVVVGYDGRYNSKEFAKYTAATFLSKG
+YKVYLFSKVVPTPYVAFAVTELKAAIGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGI
+AKQIQLNLEPWNVCAFFLDINANELLSGSSVVDPLDTIVNSYNSKITSYSVGNSGVKLAN
+EKIVYTAMHGVGGEYTKLAFEAFKLPPFVPVPQQYTPDPAFPTVAFPNPEEGKGALKLSI
+ETAEANGSRLILANDPDADRLAVAERNTNGTWKVFNGNEIGVLLADWAWQNARRAHPDTP
+ANRYFMINTAVSSAMLKTMAKHEGYRCDETLTGFKWVGNQARKVIDEEKLNFLFAYEEAI
+GFMYGDVSLDKDGVRCAPIFAEMALSYYAQGHSCEDHLETLYKRYGYHISRNRYFFCYEP
+PKMVAIFDRIRNGRNFPTKCGRFEIERVRDLTVDYDDAYPDKKARLPVSTSTQMITFWFK
+NGGIATLRGSGTEPKLKYYVEMIGQDKQVVEKELAELVDAVIQQFLRPVENELTPPKDD
diff --git a/test/jalview/ext/htsjdk/pgmB.fasta.fai b/test/jalview/ext/htsjdk/pgmB.fasta.fai
new file mode 100644 (file)
index 0000000..40491c6
--- /dev/null
@@ -0,0 +1,15 @@
+DDB_G0280897   603     15      60      61
+PPL_06716      602     641     60      61
+DFA_03821      601     1266    60      61
+DLA_10096      739     1890    60      61
+DPU1265769     466     2654    60      61
+440792448      1302    3139    60      61
+ENY64621.1     594     4476    60      61
+Ppo014092.000  592     5095    60      61
+ADB0001102_3   589     5711    60      61
+Carpum 655     6318    60      61
+Cephalum       536     6994    60      61
+Violaceum      605     7550    60      61
+Deminut        606     8175    60      61
+Ellipt 564     8800    60      61
+Lepto  599     9381    60      61
diff --git a/test/jalview/ext/jmol/JmolCommandsTest.java b/test/jalview/ext/jmol/JmolCommandsTest.java
new file mode 100644 (file)
index 0000000..46fa241
--- /dev/null
@@ -0,0 +1,34 @@
+package jalview.ext.jmol;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.SequenceRenderer;
+import jalview.structure.StructureMappingcommandSet;
+import jalview.structure.StructureSelectionManager;
+
+import org.testng.annotations.Test;
+
+public class JmolCommandsTest
+{
+
+  @Test(groups = { "Functional" })
+  public void testGetColourBySequenceCommand_noFeatures()
+  {
+    SequenceI seq1 = new Sequence("seq1", "MHRSQTRALK");
+    SequenceI seq2 = new Sequence("seq2", "MRLEITQSGD");
+    AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2 });
+    AlignFrame af = new AlignFrame(al, 800, 500);
+    SequenceRenderer sr = new SequenceRenderer(af.getViewport());
+    SequenceI[][] seqs = new SequenceI[][] { { seq1 }, { seq2 } };
+    String[] files = new String[] { "seq1.pdb", "seq2.pdb" };
+    StructureSelectionManager ssm = new StructureSelectionManager();
+
+    // need some mappings!
+
+    StructureMappingcommandSet[] commands = JmolCommands
+            .getColourBySequenceCommand(ssm, files, seqs, sr, null, al);
+  }
+}
index 0a53f10..b39b2bd 100644 (file)
@@ -37,8 +37,8 @@ import jalview.io.FileLoader;
 import jalview.io.FormatAdapter;
 import jalview.structure.StructureSelectionManager;
 
-import java.util.LinkedHashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
@@ -136,7 +136,7 @@ public class AlignViewportTest
     AlignedCodonFrame acf1 = new AlignedCodonFrame();
     AlignedCodonFrame acf2 = new AlignedCodonFrame();
 
-    Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     mappings.add(acf1);
     mappings.add(acf2);
     af1.getViewport().getAlignment().setCodonFrames(mappings);
@@ -148,18 +148,18 @@ public class AlignViewportTest
      */
     StructureSelectionManager ssm = StructureSelectionManager
             .getStructureSelectionManager(Desktop.instance);
-    assertEquals(2, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
-    assertTrue(ssm.seqmappings.contains(acf2));
+    assertEquals(2, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
+    assertTrue(ssm.getSequenceMappings().contains(acf2));
 
     /*
      * Close the second view. Verify that mappings are not removed as the first
      * view still holds a reference to them.
      */
     af1.closeMenuItem_actionPerformed(false);
-    assertEquals(2, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
-    assertTrue(ssm.seqmappings.contains(acf2));
+    assertEquals(2, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
+    assertTrue(ssm.getSequenceMappings().contains(acf2));
   }
 
   /**
@@ -183,11 +183,11 @@ public class AlignViewportTest
     AlignedCodonFrame acf2 = new AlignedCodonFrame();
     AlignedCodonFrame acf3 = new AlignedCodonFrame();
 
-    Set<AlignedCodonFrame> mappings1 = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
     mappings1.add(acf1);
     af1.getViewport().getAlignment().setCodonFrames(mappings1);
 
-    Set<AlignedCodonFrame> mappings2 = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings2 = new ArrayList<AlignedCodonFrame>();
     mappings2.add(acf2);
     mappings2.add(acf3);
     af2.getViewport().getAlignment().setCodonFrames(mappings2);
@@ -196,7 +196,7 @@ public class AlignViewportTest
      * AlignFrame1 has mapping acf1, AlignFrame2 has acf2 and acf3
      */
 
-    Set<AlignedCodonFrame> ssmMappings = ssm.seqmappings;
+    List<AlignedCodonFrame> ssmMappings = ssm.getSequenceMappings();
     assertEquals(0, ssmMappings.size());
     ssm.registerMapping(acf1);
     assertEquals(1, ssmMappings.size());
@@ -236,12 +236,12 @@ public class AlignViewportTest
     AlignedCodonFrame acf2 = new AlignedCodonFrame();
     AlignedCodonFrame acf3 = new AlignedCodonFrame();
 
-    Set<AlignedCodonFrame> mappings1 = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings1 = new ArrayList<AlignedCodonFrame>();
     mappings1.add(acf1);
     mappings1.add(acf2);
     af1.getViewport().getAlignment().setCodonFrames(mappings1);
 
-    Set<AlignedCodonFrame> mappings2 = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings2 = new ArrayList<AlignedCodonFrame>();
     mappings2.add(acf2);
     mappings2.add(acf3);
     af2.getViewport().getAlignment().setCodonFrames(mappings2);
@@ -250,7 +250,7 @@ public class AlignViewportTest
      * AlignFrame1 has mappings acf1 and acf2, AlignFrame2 has acf2 and acf3
      */
 
-    Set<AlignedCodonFrame> ssmMappings = ssm.seqmappings;
+    List<AlignedCodonFrame> ssmMappings = ssm.getSequenceMappings();
     assertEquals(0, ssmMappings.size());
     ssm.registerMapping(acf1);
     assertEquals(1, ssmMappings.size());
index 4b10ab8..c472576 100644 (file)
@@ -34,7 +34,6 @@ import jalview.gui.AlignFrame;
 
 import java.io.File;
 
-import org.junit.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
@@ -81,8 +80,8 @@ public class AnnotatedPDBFileInputTest
     {
       for (int q = p + 1; q < avec.length; q++)
       {
-        Assert.assertNotEquals("Found a duplicate annotation row "
-                + avec[p].label, avec[p], avec[q]);
+        assertTrue("Found a duplicate annotation row "
+                + avec[p].label, avec[p] != avec[q]);
       }
     }
   }
index d757a6a..625244d 100644 (file)
@@ -70,7 +70,7 @@ public class AnnotationFileIOTest
       FormatAdapter rf = new FormatAdapter();
 
       AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
-              new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
+              new IdentifyFile().identify(ff, AppletFormatAdapter.FILE));
 
       // make sure dataset is initialised ? not sure about this
       for (int i = 0; i < al.getSequencesArray().length; ++i)
index 520d1bb..7112c77 100644 (file)
 package jalview.io;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SequenceDummy;
 import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
 import jalview.gui.AlignFrame;
+import jalview.schemes.AnnotationColourGradient;
+import jalview.schemes.GraduatedColor;
 
 import java.awt.Color;
 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 
 import org.testng.annotations.Test;
 
 public class FeaturesFileTest
 {
 
-  static String TestFiles[][] = { { "Test example features import/export",
-      "examples/uniref50.fa", "examples/exampleFeatures.txt" } };
+  private static String simpleGffFile = "examples/testdata/simpleGff3.gff";
 
   @Test(groups = { "Functional" })
   public void testParse() throws Exception
   {
-    testFeaturesFileIO("Features file test");
-  }
-
-  public static AlignmentI readAlignmentFile(File f) throws IOException
-  {
-    System.out.println("Reading file: " + f);
-    String ff = f.getPath();
-    FormatAdapter rf = new FormatAdapter();
-
-    AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
-            new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
-
-    al.setDataset(null); // creates dataset sequences
-    assertNotNull("Couldn't read supplied alignment data.", al);
-    return al;
-  }
-
-  /**
-   * Helper method for testing
-   * 
-   * @param testname
-   * @param f
-   *          alignment file
-   * @param featFile
-   *          features file to load on to the alignment
-   * @throws IOException
-   */
-  public static void testFeaturesFileIO(String testname) throws IOException
-  {
     File f = new File("examples/uniref50.fa");
     AlignmentI al = readAlignmentFile(f);
     AlignFrame af = new AlignFrame(al, 500, 500);
@@ -80,7 +63,8 @@ public class FeaturesFileTest
             .getFeatureColours();
     FeaturesFile featuresFile = new FeaturesFile(
             "examples/exampleFeatures.txt", FormatAdapter.FILE);
-    assertTrue("Test " + testname + "\nFailed to parse features file.",
+    assertTrue("Test " + "Features file test"
+            + "\nFailed to parse features file.",
             featuresFile.parse(al.getDataset(), colours, true));
 
     /*
@@ -149,4 +133,288 @@ public class FeaturesFileTest
     assertEquals("netphos", sf.featureGroup);
     assertEquals("PHOSPHORYLATION (T)", sf.type);
   }
+
+  /**
+   * Test parsing a features file with a mix of Jalview and GFF formatted
+   * content
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParse_mixedJalviewGff() throws Exception
+  {
+    File f = new File("examples/uniref50.fa");
+    AlignmentI al = readAlignmentFile(f);
+    AlignFrame af = new AlignFrame(al, 500, 500);
+    Map<String, Object> colours = af.getFeatureRenderer()
+            .getFeatureColours();
+    // GFF2 uses space as name/value separator in column 9
+    String gffData = "METAL\tcc9900\n" + "GFF\n"
+            + "FER_CAPAA\tuniprot\tMETAL\t44\t45\t4.0\t.\t.\tNote Iron-sulfur; Note 2Fe-2S\n"
+            + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t2.0\t.\t.";
+    FeaturesFile featuresFile = new FeaturesFile(gffData,
+            FormatAdapter.PASTE);
+    assertTrue("Failed to parse features file",
+            featuresFile.parse(al.getDataset(), colours, true));
+
+    // verify colours read or synthesized
+    colours = af.getFeatureRenderer().getFeatureColours();
+    assertEquals("1 feature group colours not found", 1, colours.size());
+    assertEquals(colours.get("METAL"), new Color(0xcc9900));
+
+    // verify feature on FER_CAPAA
+    SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence()
+            .getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    SequenceFeature sf = sfs[0];
+    assertEquals("Iron-sulfur; 2Fe-2S", sf.description);
+    assertEquals(44, sf.begin);
+    assertEquals(45, sf.end);
+    assertEquals("uniprot", sf.featureGroup);
+    assertEquals("METAL", sf.type);
+    assertEquals(4f, sf.getScore(), 0.001f);
+
+    // verify feature on FER1_SOLLC
+    sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    sf = sfs[0];
+    assertEquals("uniprot", sf.description);
+    assertEquals(55, sf.begin);
+    assertEquals(130, sf.end);
+    assertEquals("uniprot", sf.featureGroup);
+    assertEquals("Pfam", sf.type);
+    assertEquals(2f, sf.getScore(), 0.001f);
+  }
+
+  public static AlignmentI readAlignmentFile(File f) throws IOException
+  {
+    System.out.println("Reading file: " + f);
+    String ff = f.getPath();
+    FormatAdapter rf = new FormatAdapter();
+
+    AlignmentI al = rf.readFile(ff, FormatAdapter.FILE,
+            new IdentifyFile().identify(ff, FormatAdapter.FILE));
+
+    al.setDataset(null); // creates dataset sequences
+    assertNotNull("Couldn't read supplied alignment data.", al);
+    return al;
+  }
+
+  /**
+   * Test various ways of describing a feature colour scheme
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParseGraduatedColourScheme() throws Exception
+  {
+    FeaturesFile ff = new FeaturesFile();
+
+    // colour by label:
+    GraduatedColor gc = ff.parseGraduatedColourScheme(
+            "BETA-TURN-IR\t9a6a94", "label");
+    assertTrue(gc.isColourByLabel());
+    assertEquals(Color.white, gc.getMinColor());
+    assertEquals(Color.black, gc.getMaxColor());
+    assertTrue(gc.isAutoScale());
+
+    // using colour name, rgb, etc:
+    String spec = "blue|255,0,255|absolute|20.0|95.0|below|66.0";
+    gc = ff.parseGraduatedColourScheme("BETA-TURN-IR\t" + spec, spec);
+    assertFalse(gc.isColourByLabel());
+    assertEquals(Color.blue, gc.getMinColor());
+    assertEquals(new Color(255, 0, 255), gc.getMaxColor());
+    assertFalse(gc.isAutoScale());
+    assertFalse(gc.getTolow());
+    assertEquals(20.0f, gc.getMin(), 0.001f);
+    assertEquals(95.0f, gc.getMax(), 0.001f);
+    assertEquals(AnnotationColourGradient.BELOW_THRESHOLD,
+            gc.getThreshType());
+    assertEquals(66.0f, gc.getThresh(), 0.001f);
+
+    // inverse gradient high to low:
+    spec = "blue|255,0,255|95.0|20.0|below|66.0";
+    gc = ff.parseGraduatedColourScheme("BETA-TURN-IR\t" + spec, spec);
+    assertTrue(gc.isAutoScale());
+    assertTrue(gc.getTolow());
+  }
+
+  /**
+   * Test parsing a features file with GFF formatted content only
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParse_pureGff3() throws Exception
+  {
+    File f = new File("examples/uniref50.fa");
+    AlignmentI al = readAlignmentFile(f);
+    AlignFrame af = new AlignFrame(al, 500, 500);
+    Map<String, Object> colours = af.getFeatureRenderer()
+            .getFeatureColours();
+    // GFF3 uses '=' separator for name/value pairs in colum 9
+    String gffData = "##gff-version 3\n"
+            + "FER_CAPAA\tuniprot\tMETAL\t39\t39\t0.0\t.\t.\t"
+            + "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465\n"
+            + "FER1_SOLLC\tuniprot\tPfam\t55\t130\t3.0\t.\t.\tID=$23";
+    FeaturesFile featuresFile = new FeaturesFile(gffData,
+            FormatAdapter.PASTE);
+    assertTrue("Failed to parse features file",
+            featuresFile.parse(al.getDataset(), colours, true));
+
+    // verify feature on FER_CAPAA
+    SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence()
+            .getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    SequenceFeature sf = sfs[0];
+    // description parsed from Note attribute
+    assertEquals("Iron-sulfur (2Fe-2S); another note", sf.description);
+    assertEquals(39, sf.begin);
+    assertEquals(39, sf.end);
+    assertEquals("uniprot", sf.featureGroup);
+    assertEquals("METAL", sf.type);
+    assertEquals(
+            "Note=Iron-sulfur (2Fe-2S);Note=another note;evidence=ECO:0000255|PROSITE-ProRule:PRU00465",
+            sf.getValue("ATTRIBUTES"));
+
+    // verify feature on FER1_SOLLC1
+    sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    sf = sfs[0];
+    assertEquals("uniprot", sf.description);
+    assertEquals(55, sf.begin);
+    assertEquals(130, sf.end);
+    assertEquals("uniprot", sf.featureGroup);
+    assertEquals("Pfam", sf.type);
+    assertEquals(3f, sf.getScore(), 0.001f);
+  }
+
+  /**
+   * Test parsing a features file with Jalview format features (but no colour
+   * descriptors or startgroup to give the hint not to parse as GFF)
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParse_jalviewFeaturesOnly() throws Exception
+  {
+    File f = new File("examples/uniref50.fa");
+    AlignmentI al = readAlignmentFile(f);
+    AlignFrame af = new AlignFrame(al, 500, 500);
+    Map<String, Object> colours = af.getFeatureRenderer()
+            .getFeatureColours();
+
+    /*
+     * one feature on FER_CAPAA and one on sequence 3 (index 2) FER1_SOLLC
+     */
+    String featureData = "Iron-sulfur (2Fe-2S)\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
+            + "Iron-phosphorus (2Fe-P)\tID_NOT_SPECIFIED\t2\t86\t87\tMETALLIC\n";
+    FeaturesFile featuresFile = new FeaturesFile(featureData,
+            FormatAdapter.PASTE);
+    assertTrue("Failed to parse features file",
+            featuresFile.parse(al.getDataset(), colours, true));
+
+    // verify FER_CAPAA feature
+    SequenceFeature[] sfs = al.getSequenceAt(0).getDatasetSequence()
+            .getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    SequenceFeature sf = sfs[0];
+    assertEquals("Iron-sulfur (2Fe-2S)", sf.description);
+    assertEquals(39, sf.begin);
+    assertEquals(39, sf.end);
+    assertEquals("METAL", sf.type);
+
+    // verify FER1_SOLLC feature
+    sfs = al.getSequenceAt(2).getDatasetSequence().getSequenceFeatures();
+    assertEquals(1, sfs.length);
+    sf = sfs[0];
+    assertEquals("Iron-phosphorus (2Fe-P)", sf.description);
+    assertEquals(86, sf.begin);
+    assertEquals(87, sf.end);
+    assertEquals("METALLIC", sf.type);
+  }
+
+  private void checkDatasetfromSimpleGff3(AlignmentI dataset)
+  {
+    assertEquals("no sequences extracted from GFF3 file", 2,
+            dataset.getHeight());
+  
+    SequenceI seq1 = dataset.findName("seq1");
+    SequenceI seq2 = dataset.findName("seq2");
+    assertNotNull(seq1);
+    assertNotNull(seq2);
+    assertFalse(
+            "Failed to replace dummy seq1 with real sequence",
+            seq1 instanceof SequenceDummy
+                    && ((SequenceDummy) seq1).isDummy());
+    assertFalse(
+            "Failed to replace dummy seq2 with real sequence",
+            seq2 instanceof SequenceDummy
+                    && ((SequenceDummy) seq2).isDummy());
+    String placeholderseq = new SequenceDummy("foo").getSequenceAsString();
+    assertFalse("dummy replacement buggy for seq1",
+            placeholderseq.equals(seq1.getSequenceAsString()));
+    assertFalse("dummy replacement buggy for seq2",
+            placeholderseq.equals(seq2.getSequenceAsString()));
+    assertNotNull("No features added to seq1", seq1.getSequenceFeatures());
+    assertEquals("Wrong number of features", 3,
+            seq1.getSequenceFeatures().length);
+    assertNull(seq2.getSequenceFeatures());
+    assertEquals(
+            "Wrong number of features",
+            0,
+            seq2.getSequenceFeatures() == null ? 0 : seq2
+                    .getSequenceFeatures().length);
+    assertTrue(
+            "Expected at least one CDNA/Protein mapping for seq1",
+            dataset.getCodonFrame(seq1) != null
+                    && dataset.getCodonFrame(seq1).size() > 0);
+  
+  }
+
+  @Test(groups = { "Functional" })
+  public void readGff3File() throws IOException
+  {
+    FeaturesFile gffreader = new FeaturesFile(true, simpleGffFile,
+            FormatAdapter.FILE);
+    Alignment dataset = new Alignment(gffreader.getSeqsAsArray());
+    gffreader.addProperties(dataset);
+    checkDatasetfromSimpleGff3(dataset);
+  }
+
+  @Test(groups = { "Functional" })
+  public void simpleGff3FileClass() throws IOException
+  {
+    AlignmentI dataset = new Alignment(new SequenceI[] {});
+    FeaturesFile ffile = new FeaturesFile(simpleGffFile,
+            FormatAdapter.FILE);
+  
+    boolean parseResult = ffile.parse(dataset, null, false, false);
+    assertTrue("return result should be true", parseResult);
+    checkDatasetfromSimpleGff3(dataset);
+  }
+
+  @Test(groups = { "Functional" })
+  public void simpleGff3FileLoader() throws IOException
+  {
+    AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded(
+            simpleGffFile, FormatAdapter.FILE);
+    assertTrue(
+            "Didn't read the alignment into an alignframe from Gff3 File",
+            af != null);
+    checkDatasetfromSimpleGff3(af.getViewport().getAlignment());
+  }
+
+  @Test(groups = { "Functional" })
+  public void simpleGff3RelaxedIdMatching() throws IOException
+  {
+    AlignmentI dataset = new Alignment(new SequenceI[] {});
+    FeaturesFile ffile = new FeaturesFile(simpleGffFile,
+            FormatAdapter.FILE);
+  
+    boolean parseResult = ffile.parse(dataset, null, false, true);
+    assertTrue("return result (relaxedID matching) should be true",
+            parseResult);
+    checkDatasetfromSimpleGff3(dataset);
+  }
 }
index fef7173..cde1cbc 100644 (file)
@@ -69,7 +69,7 @@ public class FileIOTester
   {
     AssertJUnit.assertTrue("Couldn't resolve " + src + " as a valid file",
             fp.isValid());
-    String type = new IdentifyFile().Identify(fp);
+    String type = new IdentifyFile().identify(fp);
     AssertJUnit.assertTrue("Data from '" + src + "' Expected to be '" + fmt
             + "' identified as '" + type + "'", type.equalsIgnoreCase(fmt));
   }
diff --git a/test/jalview/io/Gff3tests.java b/test/jalview/io/Gff3tests.java
deleted file mode 100644 (file)
index b78a004..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- * 
- * This file is part of Jalview.
- * 
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License 
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *  
- * Jalview is distributed in the hope that it will be useful, but 
- * WITHOUT ANY WARRANTY; without even the implied warranty 
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
- * PURPOSE.  See the GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.io;
-
-import static org.testng.AssertJUnit.assertEquals;
-import static org.testng.AssertJUnit.assertFalse;
-import static org.testng.AssertJUnit.assertNotNull;
-import static org.testng.AssertJUnit.assertNull;
-import static org.testng.AssertJUnit.assertTrue;
-
-import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.SequenceDummy;
-import jalview.datamodel.SequenceI;
-import jalview.gui.AlignFrame;
-
-import java.io.IOException;
-
-import org.testng.annotations.Test;
-
-public class Gff3tests
-{
-
-  private static String exonerateSeqs = "examples/testdata/exonerateseqs.fa",
-          exonerateOutput = "examples/testdata/exonerateoutput.gff",
-          simpleGff3file = "examples/testdata/simpleGff3.gff";
-
-  @Test(groups = { "Functional" })
-  public void testExonerateImport()
-  {
-    // exonerate does not tag sequences after features, so we have a more
-    // conventional annotation import test here
-
-    FileLoader loader = new FileLoader(false);
-
-    AlignFrame af = loader.LoadFileWaitTillLoaded(exonerateSeqs,
-            FormatAdapter.FILE);
-
-    assertEquals("Unexpected number of DNA protein associations", 0, af
-            .getViewport().getAlignment().getCodonFrames().size());
-
-    af.loadJalviewDataFile(exonerateOutput, FormatAdapter.FILE, null, null);
-
-    assertTrue("Expected at least one DNA protein association", af
-            .getViewport().getAlignment().getDataset().getCodonFrames()
-            .size() > 0);
-
-  }
-
-  @Test(groups = { "Functional" })
-  public void simpleGff3FileIdentify()
-  {
-    assertEquals("Didn't recognise file correctly.", IdentifyFile.GFF3File,
-            new IdentifyFile().Identify(simpleGff3file, FormatAdapter.FILE));
-  }
-
-  @Test(groups = { "Functional" })
-  public void simpleGff3FileClass() throws IOException
-  {
-    AlignmentI dataset = new Alignment(new SequenceI[] {});
-    FeaturesFile ffile = new FeaturesFile(simpleGff3file,
-            FormatAdapter.FILE);
-
-    boolean parseResult = ffile.parse(dataset, null, null, false, false);
-    assertTrue("return result should be true", parseResult);
-    checkDatasetfromSimpleGff3(dataset);
-  }
-
-  @Test(groups = { "Functional" })
-  public void simpleGff3FileLoader() throws IOException
-  {
-    AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded(
-            simpleGff3file, FormatAdapter.FILE);
-    assertTrue(
-            "Didn't read the alignment into an alignframe from Gff3 File",
-            af != null);
-    checkDatasetfromSimpleGff3(af.getViewport().getAlignment().getDataset());
-  }
-
-  @Test(groups = { "Functional" })
-  public void simpleGff3RelaxedIdMatching() throws IOException
-  {
-    AlignmentI dataset = new Alignment(new SequenceI[] {});
-    FeaturesFile ffile = new FeaturesFile(simpleGff3file,
-            FormatAdapter.FILE);
-
-    boolean parseResult = ffile.parse(dataset, null, null, false, true);
-    assertTrue("return result (relaxedID matching) should be true",
-            parseResult);
-    checkDatasetfromSimpleGff3(dataset);
-  }
-
-  @Test(groups = { "Functional" })
-  public void readGff3File() throws IOException
-  {
-    Gff3File gff3reader = new Gff3File(simpleGff3file, FormatAdapter.FILE);
-    Alignment dataset = new Alignment(gff3reader.getSeqsAsArray());
-    gff3reader.addProperties(dataset);
-    checkDatasetfromSimpleGff3(dataset);
-
-  }
-
-  private void checkDatasetfromSimpleGff3(AlignmentI dataset)
-  {
-    assertEquals("no sequences extracted from GFF3 file", 2,
-            dataset.getHeight());
-
-    SequenceI seq1 = dataset.findName("seq1"), seq2 = dataset
-            .findName("seq2");
-    assertNotNull(seq1);
-    assertNotNull(seq2);
-    assertFalse(
-            "Failed to replace dummy seq1 with real sequence",
-            seq1 instanceof SequenceDummy
-                    && ((SequenceDummy) seq1).isDummy());
-    assertFalse(
-            "Failed to replace dummy seq2 with real sequence",
-            seq2 instanceof SequenceDummy
-                    && ((SequenceDummy) seq2).isDummy());
-    String placeholderseq = new SequenceDummy("foo").getSequenceAsString();
-    assertFalse("dummy replacement buggy for seq1",
-            placeholderseq.equals(seq1.getSequenceAsString()));
-    assertFalse("dummy replacement buggy for seq2",
-            placeholderseq.equals(seq2.getSequenceAsString()));
-    assertNotNull("No features added to seq1", seq1.getSequenceFeatures());// !=
-                                                                           // null);
-    assertEquals("Wrong number of features", 3,
-            seq1.getSequenceFeatures().length);
-    assertNull(seq2.getSequenceFeatures());
-    assertEquals(
-            "Wrong number of features",
-            0,
-            seq2.getSequenceFeatures() == null ? 0 : seq2
-                    .getSequenceFeatures().length);
-    assertTrue(
-            "Expected at least one CDNA/Protein mapping for seq1",
-            dataset.getCodonFrame(seq1) != null
-                    && dataset.getCodonFrame(seq1).size() > 0);
-
-  }
-  // @Test(groups ={ "Functional" })
-  // public final void testPrintGFFFormatSequenceIArrayMapOfStringObject()
-  // {
-  // fail("Not yet implemented");
-  // }
-  //
-  // @Test(groups ={ "Functional" })
-  // public final void testAlignFileBooleanStringString()
-  // {
-  // fail("Not yet implemented");
-  // }
-
-}
index 3b99208..6c13e26 100644 (file)
@@ -20,8 +20,6 @@
  */
 package jalview.io;
 
-import static org.junit.Assert.fail;
-
 import org.testng.annotations.Test;
 
 public class HtmlFileTest
@@ -30,7 +28,7 @@ public class HtmlFileTest
   @Test(groups = { "Functional" }, enabled = false)
   public void test()
   {
-    fail("Not yet implemented");
+    org.testng.AssertJUnit.fail("Not yet implemented");
   }
 
 }
index c958ff0..60f2e48 100644 (file)
@@ -20,6 +20,9 @@
  */
 package jalview.io;
 
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -28,15 +31,37 @@ public class IdentifyFileTest
 {
 
   @Test(groups = { "Functional" }, dataProvider = "identifyFiles")
-  public void Identify(String data, String expectedFileType)
+  public void testIdentify(String data, String expectedFileType)
   {
     String protocol = AppletFormatAdapter.FILE;
     IdentifyFile ider = new IdentifyFile();
-    String actualFiletype = ider.Identify(data, protocol);
+    String actualFiletype = ider.identify(data, protocol);
     Assert.assertEquals(actualFiletype, expectedFileType,
             "File identification Failed!");
   }
 
+  /**
+   * Additional tests for (a) Jalview features file with no colour
+   * specifications (old style 'groups' file) and (b) Jalview features file with
+   * embedded GFF
+   */
+  @Test(groups = "Functional")
+  public void testIdentify_featureFile()
+  {
+    IdentifyFile ider = new IdentifyFile();
+
+    // Jalview format with features only, no feature colours
+    String data = "Iron-sulfur (2Fe-2S)\tFER_CAPAA\t-1\t39\t39\tMETAL\n"
+            + "Iron-phosphorus (2Fe-P)\tID_NOT_SPECIFIED\t2\t86\t87\tMETALLIC\n";
+    Assert.assertEquals(IdentifyFile.FeaturesFile, ider.identify(data, AppletFormatAdapter.PASTE));
+
+    // Jalview feature colour followed by GFF format feature data
+    data = "METAL\tcc9900\n" + "GFF\n"
+            + "FER_CAPAA\tuniprot\tMETAL\t44\t45\t4.0\t.\t.\n";
+    Assert.assertEquals(IdentifyFile.FeaturesFile,
+            ider.identify(data, AppletFormatAdapter.PASTE));
+  }
+
   @DataProvider(name = "identifyFiles")
   public Object[][] IdentifyFileDP()
   {
@@ -54,7 +79,8 @@ public class IdentifyFileTest
         { "examples/testdata/test.html", "HTML" },
         { "examples/testdata/test.pileup", "PileUp" },
         { "examples/testdata/test.blc", "BLC" },
-        { "examples/testdata/simplegff3.gff", "GFF v2 or v3" },
+        { "examples/exampleFeatures.txt", IdentifyFile.FeaturesFile },
+        { "examples/testdata/simpleGff3.gff", IdentifyFile.FeaturesFile },
         { "examples/testdata/test.jvp", "Jalview" },
         {
             "examples/testdata/cullpdb_pc25_res3.0_R0.3_d150729_chains9361.fasta.15316",
@@ -65,4 +91,22 @@ public class IdentifyFileTest
     };
   }
 
+  @Test(groups = "Functional")
+  public void testLooksLikeFeatureData()
+  {
+    IdentifyFile id = new IdentifyFile();
+    assertFalse(id.looksLikeFeatureData(null));
+    assertFalse(id.looksLikeFeatureData(""));
+    // too few columns:
+    assertFalse(id.looksLikeFeatureData("1 \t 2 \t 3 \t 4 \t 5"));
+    // GFF format:
+    assertTrue(id
+            .looksLikeFeatureData("Seq1\tlocal\tHelix\t2456\t2462\tss"));
+    // Jalview format:
+    assertTrue(id.looksLikeFeatureData("Helix\tSeq1\t-1\t2456\t2462\tss"));
+    // non-numeric start column:
+    assertFalse(id.looksLikeFeatureData("Helix\tSeq1\t-1\t.\t2462\tss"));
+    // non-numeric start column:
+    assertFalse(id.looksLikeFeatureData("Helix\tSeq1\t-1\t2456\t.\tss"));
+  }
 }
index e889837..d7a9166 100644 (file)
@@ -54,7 +54,7 @@ public class StockholmFileTest
   {
     AppletFormatAdapter af = new AppletFormatAdapter();
     AlignmentI al = af.readFile(PfamFile, af.FILE,
-            new IdentifyFile().Identify(PfamFile, af.FILE));
+            new IdentifyFile().identify(PfamFile, af.FILE));
     int numpdb = 0;
     for (SequenceI sq : al.getSequences())
     {
@@ -95,7 +95,7 @@ public class StockholmFileTest
       AppletFormatAdapter rf = new AppletFormatAdapter();
 
       AlignmentI al = rf.readFile(ff, AppletFormatAdapter.FILE,
-              new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
+              new IdentifyFile().identify(ff, AppletFormatAdapter.FILE));
 
       assertNotNull("Couldn't read supplied alignment data.", al);
 
@@ -112,7 +112,7 @@ public class StockholmFileTest
               AppletFormatAdapter.PASTE, ioformat);
       assertNotNull("Couldn't parse reimported alignment data.", al_input);
 
-      String identifyoutput = new IdentifyFile().Identify(outputfile,
+      String identifyoutput = new IdentifyFile().identify(outputfile,
               AppletFormatAdapter.PASTE);
       assertNotNull("Identify routine failed for outputformat " + ioformat,
               identifyoutput);
diff --git a/test/jalview/io/gff/ExonerateHelperTest.java b/test/jalview/io/gff/ExonerateHelperTest.java
new file mode 100644 (file)
index 0000000..54d6eb2
--- /dev/null
@@ -0,0 +1,295 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.MappingType;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.io.FileLoader;
+import jalview.io.FormatAdapter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.testng.annotations.Test;
+
+public class ExonerateHelperTest
+{
+  @Test(groups = "Functional")
+  public void testGetMappingType()
+  {
+    // protein-to-dna:
+    assertSame(MappingType.PeptideToNucleotide,
+            ExonerateHelper
+                    .getMappingType("exonerate:protein2genome:local"));
+    assertSame(MappingType.PeptideToNucleotide,
+            ExonerateHelper.getMappingType("exonerate:protein2dna:local"));
+
+    // dna-to-dna:
+    assertSame(MappingType.NucleotideToNucleotide,
+            ExonerateHelper.getMappingType("coding2coding"));
+    assertSame(MappingType.NucleotideToNucleotide,
+            ExonerateHelper.getMappingType("coding2genome"));
+    assertSame(MappingType.NucleotideToNucleotide,
+            ExonerateHelper.getMappingType("cdna2genome"));
+    assertSame(MappingType.NucleotideToNucleotide,
+            ExonerateHelper.getMappingType("genome2genome"));
+    assertNull(ExonerateHelper.getMappingType("affine:local"));
+  }
+
+  /**
+   * Test processing one exonerate GFF line for the case where the mapping is
+   * protein2dna, similarity feature is on the query (the protein), match to the
+   * forward strand, target sequence is in neither the alignment nor the 'new
+   * sequences'
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessGffSimilarity_protein2dna_forward_querygff()
+          throws IOException
+  {
+    ExonerateHelper testee = new ExonerateHelper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t.\t+\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
+            .split("\\t");
+    SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+    Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
+
+    /*
+     * this should create a mapping from Seq2/3-10 to virtual sequence
+     * dna1 (added to newseqs) positions 400-423
+     */
+    testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("dna1", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Test processing one exonerate GFF line for the case where the mapping is
+   * protein2dna, similarity feature is on the query (the protein), match to the
+   * reverse strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessGffSimilarity_protein2dna_reverse_querygff()
+          throws IOException
+  {
+    ExonerateHelper testee = new ExonerateHelper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "Seq\texonerate:protein2dna:local\tsimilarity\t3\t10\t0\t-\t.\talignment_id 0 ; Target dna1 ; Align 3 400 8"
+            .split("\\t");
+    SequenceI seq = new Sequence("Seq", "PQRASTGKEEDVMIWCHQN");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+    Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
+
+    /*
+     * this should create a mapping from Seq2/3-10 to virtual sequence
+     * dna1 (added to newseqs) positions 400-377 (reverse)
+     */
+    testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("dna1", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Test processing one exonerate GFF line for the case where the mapping is
+   * protein2dna, similarity feature is on the target (the dna), match to the
+   * forward strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessGffSimilarity_protein2dna_forward_targetgff()
+          throws IOException
+  {
+    ExonerateHelper testee = new ExonerateHelper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t400\t423\t0\t+\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
+            .split("\\t");
+    SequenceI seq = new Sequence("dna1/391-430",
+            "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] { seq });
+    // GFF feature on the target describes mapping from base 400 for
+    // count 24 to position 3
+    Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
+
+    /*
+     * this should create a mapping from virtual sequence dna1 (added to 
+     * newseqs) positions 400-423 to Prot1/3-10
+     */
+    testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("Prot1", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 400, 423 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Test processing one exonerate GFF line for the case where the mapping is
+   * protein2dna, similarity feature is on the target (the dna), match to the
+   * reverse strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessGffSimilarity_protein2dna_reverse_targetgff()
+          throws IOException
+  {
+    ExonerateHelper testee = new ExonerateHelper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "dna1\texonerate:protein2dna:local\tsimilarity\t377\t400\t0\t-\t.\talignment_id 0 ; Query Prot1 ; Align 400 3 24"
+            .split("\\t");
+    SequenceI seq = new Sequence("dna1/371-410",
+            "CGATCCGATCCGATCCGATCCGATCCGATCCGATCCGATC");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] { seq });
+    // GFF feature on the target describes mapping from base 400 for
+    // count 24 to position 3
+    Map<String, List<String>> set = Gff2Helper.parseNameValuePairs(gff[8]);
+
+    /*
+     * this should create a mapping from virtual sequence dna1 (added to 
+     * newseqs) positions 400-377 (reverse) to Prot1/3-10
+     */
+    testee.processGffSimilarity(set, seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("Prot1", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 400, 377 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 3, 10 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Tests loading exonerate GFF2 output, including 'similarity' alignment
+   * feature, on to sequences
+   */
+  @Test(groups = { "Functional" })
+  public void testAddExonerateGffToAlignment()
+  {
+    FileLoader loader = new FileLoader(false);
+    AlignFrame af = loader.LoadFileWaitTillLoaded(
+            "examples/testdata/exonerateseqs.fa",
+            FormatAdapter.FILE);
+  
+    af.loadJalviewDataFile("examples/testdata/exonerateoutput.gff",
+            FormatAdapter.FILE, null, null);
+  
+    /*
+     * verify one mapping to a dummy sequence, one to a real one
+     */
+    List<AlignedCodonFrame> mappings = af
+            .getViewport().getAlignment().getDataset().getCodonFrames();
+    assertEquals(2, mappings.size());
+    Iterator<AlignedCodonFrame> iter = mappings.iterator();
+  
+    // first mapping is to dummy sequence
+    AlignedCodonFrame mapping = iter.next();
+    Mapping[] mapList = mapping.getProtMappings();
+    assertEquals(1, mapList.length);
+    assertTrue(mapList[0].getTo() instanceof SequenceDummy);
+    assertEquals("DDB_G0269124", mapList[0].getTo().getName());
+
+    // 143 in protein should map to codon [11270, 11269, 11268] in dna
+    int[] mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
+    assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
+  
+    // second mapping is to a sequence in the alignment
+    mapping = iter.next();
+    mapList = mapping.getProtMappings();
+    assertEquals(1, mapList.length);
+    SequenceI proteinSeq = af.getViewport().getAlignment()
+            .findName("DDB_G0280897");
+    assertSame(proteinSeq.getDatasetSequence(), mapList[0].getTo());
+    assertEquals(1, mapping.getdnaToProt().length);
+  
+    // 143 in protein should map to codon [11270, 11269, 11268] in dna
+    mappedRegion = mapList[0].getMap().locateInFrom(143, 143);
+    assertArrayEquals(new int[] { 11270, 11268 }, mappedRegion);
+  
+    // 182 in protein should map to codon [11153, 11152, 11151] in dna
+    mappedRegion = mapList[0].getMap().locateInFrom(182, 182);
+    assertArrayEquals(new int[] { 11153, 11151 }, mappedRegion);
+  
+    // and the reverse mapping:
+    mappedRegion = mapList[0].getMap().locateInTo(11151, 11153);
+    assertArrayEquals(new int[] { 182, 182 }, mappedRegion);
+  
+    // 11150 in dna should _not_ map to protein
+    mappedRegion = mapList[0].getMap().locateInTo(11150, 11150);
+    assertNull(mappedRegion);
+  
+    // similarly 183 in protein should _not_ map to dna
+    mappedRegion = mapList[0].getMap().locateInFrom(183, 183);
+    assertNull(mappedRegion);
+  }
+}
diff --git a/test/jalview/io/gff/Gff3HelperTest.java b/test/jalview/io/gff/Gff3HelperTest.java
new file mode 100644 (file)
index 0000000..420b032
--- /dev/null
@@ -0,0 +1,206 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+public class Gff3HelperTest
+{
+
+  /**
+   * Test processing one PASA GFF line giving a match from forward strand to
+   * forward strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessCdnaMatch_forwardToForward() throws IOException
+  {
+    GffHelperBase testee = new Gff3Helper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
+            .split("\\t");
+    SequenceI seq = new Sequence("gi|68711",
+            "GAATTCGTTCATGTAGGTTGATTTTTATT");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+
+    /*
+     * this should create a mapping from gi|68711/12923-13060
+     * to virtual sequence gi|N37351 (added to newseqs) positions 1-138
+     */
+    testee.processGff(seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("gi|N37351", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+
+    /*
+     * 'dnaseqs' (map from) is here [gi|68711]
+     * 'aaseqs' (map to) is here [gi|N37351]
+     */
+    // TODO use more suitable naming in AlignedCodonFrame
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 1, 138 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Test processing one PASA GFF line giving a match from forward strand to
+   * reverse strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessCdnaMatch_forwardToReverse() throws IOException
+  {
+    GffHelperBase testee = new Gff3Helper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 -"
+            .split("\\t");
+    SequenceI seq = new Sequence("gi|68711",
+            "GAATTCGTTCATGTAGGTTGATTTTTATT");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+
+    /*
+     * this should create a mapping from gi|68711/12923-13060
+     * to virtual sequence gi|N37351 (added to newseqs) positions 138-1
+     */
+    testee.processGff(seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("gi|N37351", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+
+    /*
+     * 'dnaseqs' (map from) is here [gi|68711]
+     * 'aaseqs' (map to) is here [gi|N37351]
+     */
+    // TODO use more suitable naming in AlignedCodonFrame
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 12923, 13060 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 138, 1 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+  /**
+   * Test processing one PASA GFF line giving a match from reverse complement
+   * strand to forward strand
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessCdnaMatch_reverseToForward() throws IOException
+  {
+    GffHelperBase testee = new Gff3Helper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t-\t.\tID=align_68;Target=gi|N37351 1 138 +"
+            .split("\\t");
+    SequenceI seq = new Sequence("gi|68711",
+            "GAATTCGTTCATGTAGGTTGATTTTTATT");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+
+    /*
+     * (For now) we don't process reverse complement mappings; to do this
+     * would require (a) creating a virtual sequence placeholder for the
+     * reverse complement (b) resolving the sequence by its id from some
+     * source (GFF ##FASTA or other) (c) creating the reverse complement
+     * sequence (d) updating the mapping to be to the reverse complement
+     */
+    SequenceFeature sf = testee.processGff(seq, gff, align, newseqs, false);
+    assertNull(sf);
+    assertTrue(newseqs.isEmpty());
+  }
+
+  /**
+   * Test processing two PASA GFF lines representing a spliced mapping
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessCdnaMatch_spliced() throws IOException
+  {
+    GffHelperBase testee = new Gff3Helper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    SequenceI seq = new Sequence("gi|68711",
+            "GAATTCGTTCATGTAGGTTGATTTTTATT");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+  
+    // mapping from gi|68711 12923-13060 to gi|N37351 1-138
+    String[] gff = "gi|68711\tblat-pasa\tcDNA_match\t12923\t13060\t98.55\t+\t.\tID=align_68;Target=gi|N37351 1 138 +"
+            .split("\\t");
+    testee.processGff(seq, gff, align, newseqs, false);
+    // mapping from gi|68711 13411-13550 to gi|N37351 139-278
+    gff = "gi|68711\tblat-pasa\tcDNA_match\t13411\t13550\t98.55\t+\t.\tID=align_68;Target=gi|N37351 139 278 +"
+            .split("\\t");
+    testee.processGff(seq, gff, align, newseqs, false);
+
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("gi|N37351", newseqs.get(0).getName());
+
+    // only 1 AlignedCodonFrame added to the alignment with both mappings!
+    // (this is important for 'align cdna to genome' to work correctly)
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().get(0);
+  
+    /*
+     * 'dnaseqs' (map from) is here [gi|68711]
+     * 'aaseqs' (map to) is here [gi|N37351]
+     */
+    // TODO use more suitable naming in AlignedCodonFrame
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(2, mapping.getdnaToProt()[0].getFromRanges().size());
+    // the two spliced dna ranges are combined in one MapList
+    assertArrayEquals(new int[] { 12923, 13060 },
+            mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertArrayEquals(new int[] { 13411, 13550 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(1));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    // the two cdna ranges are merged into one contiguous region
+    assertArrayEquals(new int[] { 1, 278 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+}
diff --git a/test/jalview/io/gff/GffHelperBaseTest.java b/test/jalview/io/gff/GffHelperBaseTest.java
new file mode 100644 (file)
index 0000000..fe8f88e
--- /dev/null
@@ -0,0 +1,168 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.testng.annotations.Test;
+
+public class GffHelperBaseTest
+{
+
+  /**
+   * Test the method that parses lines like <br>
+   * ID=2345;Name=Something,Another thing;Notes=Hello;Notes=World
+   */
+  @Test(groups = { "Functional" })
+  public void testParseNameValuePairs()
+  {
+    assertTrue(GffHelperBase.parseNameValuePairs(null, ";", ' ', ",")
+            .isEmpty());
+    assertTrue(GffHelperBase.parseNameValuePairs("", ";", ' ', ",")
+            .isEmpty());
+    assertTrue(GffHelperBase.parseNameValuePairs("hello=world", ";", ' ',
+            ",").isEmpty());
+
+    Map<String, List<String>> map = GffHelperBase.parseNameValuePairs(
+            "hello world", ";", ' ', ", ");
+    assertEquals(1, map.size());
+    assertEquals(1, map.get("hello").size());
+    assertEquals("world", map.get("hello").get(0));
+
+    map = GffHelperBase
+            .parseNameValuePairs(
+                    "Method= manual curation ;nothing; Notes=F2 S ; Notes=Metal,Shiny; Type=",
+                    ";", '=', ",");
+
+    // Type is ignored as no value was supplied
+    assertEquals(2, map.size());
+
+    assertEquals(1, map.get("Method").size());
+    assertEquals("manual curation", map.get("Method").get(0)); // trimmed
+
+    assertEquals(3, map.get("Notes").size());
+    assertEquals("F2 S", map.get("Notes").get(0));
+    assertEquals("Metal", map.get("Notes").get(1));
+    assertEquals("Shiny", map.get("Notes").get(2));
+  }
+
+  /**
+   * Test for the method that tries to trim mappings to equivalent lengths
+   */
+  @Test(groups = "Functional")
+  public void testTrimMapping()
+  {
+    int[] from = { 1, 12 };
+    int[] to = { 20, 31 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
+    assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+
+    // from too long:
+    from = new int[] { 1, 13 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[1, 12]", Arrays.toString(from)); // trimmed
+    assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+
+    // to too long:
+    to = new int[] { 20, 33 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[1, 12]", Arrays.toString(from)); // unchanged
+    assertEquals("[20, 31]", Arrays.toString(to)); // trimmed
+
+    // from reversed:
+    from = new int[] { 12, 1 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
+    assertEquals("[20, 31]", Arrays.toString(to)); // unchanged
+
+    // to reversed:
+    to = new int[] { 31, 20 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[12, 1]", Arrays.toString(from)); // unchanged
+    assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+
+    // from reversed and too long:
+    from = new int[] { 14, 1 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[14, 3]", Arrays.toString(from)); // end trimmed
+    assertEquals("[31, 20]", Arrays.toString(to)); // unchanged
+
+    // to reversed and too long:
+    to = new int[] { 31, 10 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 1));
+    assertEquals("[14, 3]", Arrays.toString(from)); // unchanged
+    assertEquals("[31, 20]", Arrays.toString(to)); // end trimmed
+
+    // cdna to peptide (matching)
+    from = new int[] { 1, 18 };
+    to = new int[] { 4, 9 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
+    assertEquals("[1, 18]", Arrays.toString(from)); // unchanged
+    assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+
+    // overlong cdna to peptide
+    from = new int[] { 1, 20 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
+    assertEquals("[1, 18]", Arrays.toString(from)); // end trimmed
+    assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+
+    // overlong cdna (reversed) to peptide
+    from = new int[] { 20, 1 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
+    assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
+    assertEquals("[4, 9]", Arrays.toString(to)); // unchanged
+
+    // overlong cdna (reversed) to peptide (reversed)
+    from = new int[] { 20, 1 };
+    to = new int[] { 9, 4 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 3, 1));
+    assertEquals("[20, 3]", Arrays.toString(from)); // end trimmed
+    assertEquals("[9, 4]", Arrays.toString(to)); // unchanged
+
+    // peptide to cdna (matching)
+    from = new int[] { 4, 9 };
+    to = new int[] { 1, 18 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
+    assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+
+    // peptide to overlong cdna
+    to = new int[] { 1, 20 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
+    assertEquals("[1, 18]", Arrays.toString(to)); // end trimmed
+
+    // peptide to overlong cdna (reversed)
+    to = new int[] { 20, 1 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[4, 9]", Arrays.toString(from)); // unchanged
+    assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+
+    // peptide (reversed) to overlong cdna (reversed)
+    from = new int[] { 9, 4 };
+    to = new int[] { 20, 1 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[9, 4]", Arrays.toString(from)); // unchanged
+    assertEquals("[20, 3]", Arrays.toString(to)); // end trimmed
+
+    // overlong peptide to word-length cdna
+    from = new int[] { 4, 10 };
+    to = new int[] { 1, 18 };
+    assertTrue(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[4, 9]", Arrays.toString(from)); // end trimmed
+    assertEquals("[1, 18]", Arrays.toString(to)); // unchanged
+
+    // overlong peptide to non-word-length cdna
+    from = new int[] { 4, 10 };
+    to = new int[] { 1, 19 };
+    assertFalse(GffHelperBase.trimMapping(from, to, 1, 3));
+    assertEquals("[4, 10]", Arrays.toString(from)); // unchanged
+    assertEquals("[1, 19]", Arrays.toString(to)); // unchanged
+
+  }
+}
diff --git a/test/jalview/io/gff/GffHelperFactoryTest.java b/test/jalview/io/gff/GffHelperFactoryTest.java
new file mode 100644 (file)
index 0000000..657b5bd
--- /dev/null
@@ -0,0 +1,72 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+
+import org.testng.annotations.Test;
+
+public class GffHelperFactoryTest
+{
+
+  @Test(groups = "Functional")
+  public void testGetHelper()
+  {
+    assertNull(GffHelperFactory.getHelper(null));
+
+    String tabRegex = "\\t";
+
+    /*
+     * column 3 = 'similarity' indicates exonerate GFF alignment data
+     */
+    String gff = "submitted\taffine:local\tsimilarity\t20\t30\t99\t+\t.\t";
+    // no attributes (column 9 data):
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof Gff2Helper);
+
+    // attributes set but unhandled featureGroup - get generic handler
+    gff = "submitted\taffine:local\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertSame(GffHelperFactory.getHelper(gff.split(tabRegex)).getClass(),
+            Gff3Helper.class);
+
+    // handled featureGroup (exonerate model) values
+    gff = "submitted\texonerate:protein2dna:local\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    gff = "submitted\tprotein2genome\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    gff = "submitted\tcoding2coding\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    gff = "submitted\tcoding2genome\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    gff = "submitted\tcdna2genome\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    gff = "submitted\tgenome2genome\tsimilarity\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    // not case-sensitive:
+    gff = "submitted\tgenome2genome\tSIMILARITY\t20\t30\t99\t+\t.\tID=$1";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof ExonerateHelper);
+
+    /*
+     * InterProScan has 'protein_match' in column 3
+     */
+    gff = "Submitted\tPANTHER\tprotein_match\t1\t1174\t0.0\t+\t.\tName=PTHR32154";
+    assertTrue(GffHelperFactory.getHelper(gff.split(tabRegex)) instanceof InterProScanHelper);
+
+    /*
+     * nothing specific - return the generic GFF3 class if Name=Value is present in col9
+     */
+    gff = "nothing\tinteresting\there\t20\t30\t99\t+\t.\tID=1";
+    GffHelperI helper = GffHelperFactory.getHelper(gff.split(tabRegex));
+    assertSame(helper.getClass(), Gff3Helper.class);
+
+    // return the generic GFF2 class if "Name Value" is present in col9
+    gff = "nothing\tinteresting\there\t20\t30\t99\t+\t.\tID 1";
+    helper = GffHelperFactory.getHelper(gff.split(tabRegex));
+    assertSame(helper.getClass(), Gff2Helper.class);
+  }
+}
diff --git a/test/jalview/io/gff/GffTests.java b/test/jalview/io/gff/GffTests.java
new file mode 100644 (file)
index 0000000..77da8fa
--- /dev/null
@@ -0,0 +1,88 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.io.FileLoader;
+import jalview.io.FormatAdapter;
+
+import java.util.List;
+
+import org.testng.annotations.Test;
+
+/**
+ * Tests of use cases that include parsing GFF (version 2 or 3) features that
+ * describe mappings between protein and cDNA. The format of the GFF varies
+ * depending on which tool generated it.
+ */
+public class GffTests
+{
+  /**
+   * Test the case where we load a protein ('query') sequence, then exonerateGff
+   * describing its mapping to cDNA, and then a DNA sequence including the
+   * mapped region
+   */
+  @Test(groups = "Functional")
+  public void testResolveExonerateGff()
+  {
+    String proteinSeq = ">prot1/10-16\nYCWRSGA";
+    AlignFrame af = new FileLoader(false).LoadFileWaitTillLoaded(
+            proteinSeq, FormatAdapter.PASTE);
+
+    /*
+     * exonerate GFF output mapping residues 11-15 (CWRSG) 
+     * to bases 24-10 in sequence 'dna1' (reverse strand)
+     */
+    String exonerateGff = "##gff-version 2\n"
+            + "prot1\tprotein2genome\tsimilarity\t11\t15\t99\t-\t.\talignment_id 0 ; Target dna1 ; Align 11 24 5";
+    af.loadJalviewDataFile(exonerateGff, FormatAdapter.PASTE, null, null);
+
+    /*
+     * check we have a mapping from prot1 to SequenceDummy 'dna1'
+     */
+    AlignmentI dataset = af.getViewport().getAlignment().getDataset();
+    assertEquals(1, dataset.getSequences().size());
+    assertEquals("prot1", dataset.getSequenceAt(0).getName());
+    assertEquals("YCWRSGA", dataset.getSequenceAt(0).getSequenceAsString());
+    List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
+    assertEquals(1, mappings.size());
+    AlignedCodonFrame mapping = mappings.iterator().next();
+    SequenceI mappedDna = mapping.getDnaForAaSeq(dataset.getSequenceAt(0));
+    assertTrue(mappedDna instanceof SequenceDummy);
+    assertEquals("dna1", mappedDna.getName());
+    Mapping[] mapList = mapping.getProtMappings();
+    assertEquals(1, mapList.length);
+    // 11 in protein should map to codon [24, 23, 22] in dna
+    int[] mappedRegion = mapList[0].getMap().locateInFrom(11, 11);
+    assertArrayEquals(new int[] { 24, 22 }, mappedRegion);
+    // 15 in protein should map to codon [12, 11, 10] in dna
+    mappedRegion = mapList[0].getMap().locateInFrom(15, 15);
+    assertArrayEquals(new int[] { 12, 10 }, mappedRegion);
+
+    // so far so good; TODO: programmatically add mapped sequences
+    // and verify the mappings are 'realised'
+    SequenceI dna1 = new Sequence("dna1", "AAACCCGGGTTTAAACCCGGGTTT");
+    AlignmentI al = new Alignment(new SequenceI[] { dna1 });
+    al.setDataset(null);
+
+    /*
+     * Now 'realise' the virtual mapping to the real DNA sequence;
+     * interactively this could be by a drag or fetch of the sequence data
+     * on to the alignment
+     */
+    mapping.realiseWith(dna1);
+    // verify the mapping is now from the real, not the dummy sequence
+    assertSame(dna1.getDatasetSequence(),
+            mapping.getDnaForAaSeq(dataset.getSequenceAt(0)));
+  }
+}
diff --git a/test/jalview/io/gff/InterProScanHelperTest.java b/test/jalview/io/gff/InterProScanHelperTest.java
new file mode 100644 (file)
index 0000000..2ef4c99
--- /dev/null
@@ -0,0 +1,71 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceDummy;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.testng.annotations.Test;
+
+public class InterProScanHelperTest
+{
+
+  /**
+   * Test processing one InterProScan GFF line
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testProcessProteinMatch() throws IOException
+  {
+    InterProScanHelper testee = new InterProScanHelper();
+    List<SequenceI> newseqs = new ArrayList<SequenceI>();
+    String[] gff = "Submitted\tPfam\tprotein_match\t5\t30\t0\t+\t.\tName=PF12838;Target=Submitted 5 30;signature_desc=4Fe-4S dicluster domain;ID=match$17_5_30"
+            .split("\\t");
+    SequenceI seq = new Sequence("Prot1", "PQRASTGKEEDVMIWCHQN");
+    seq.createDatasetSequence();
+    AlignmentI align = new Alignment(new SequenceI[] {});
+    Map<String, List<String>> set = Gff3Helper.parseNameValuePairs(gff[8]);
+  
+    /*
+     * this should create a mapping from Prot1/5-30 to virtual sequence
+     * match$17_5_30 (added to newseqs) positions 1-26
+     */
+    testee.processProteinMatch(set, seq, gff, align, newseqs, false);
+    assertEquals(1, newseqs.size());
+    assertTrue(newseqs.get(0) instanceof SequenceDummy);
+    assertEquals("match$17_5_30", newseqs.get(0).getName());
+    assertEquals(1, align.getCodonFrames().size());
+    AlignedCodonFrame mapping = align.getCodonFrames().iterator().next();
+
+    /*
+     * 'dnaseqs' (map from) is here [Prot1]
+     * 'aaseqs' (map to) is here [match$17_5_30]
+     */
+    // TODO use more suitable naming in AlignedCodonFrame
+    assertEquals(1, mapping.getAaSeqs().length);
+    assertSame(seq.getDatasetSequence(), mapping.getdnaSeqs()[0]);
+    assertEquals(1, mapping.getdnaSeqs().length);
+    assertSame(newseqs.get(0), mapping.getAaSeqs()[0]);
+    assertEquals(1, mapping.getdnaToProt().length);
+    assertEquals(1, mapping.getdnaToProt()[0].getFromRanges().size());
+    assertArrayEquals(new int[] { 5, 30 }, mapping.getdnaToProt()[0]
+            .getFromRanges().get(0));
+    assertEquals(1, mapping.getdnaToProt()[0].getToRanges().size());
+    assertArrayEquals(new int[] { 1, 26 }, mapping.getdnaToProt()[0]
+            .getToRanges().get(0));
+  }
+
+}
diff --git a/test/jalview/io/gff/SequenceOntologyTest.java b/test/jalview/io/gff/SequenceOntologyTest.java
new file mode 100644 (file)
index 0000000..6c9226f
--- /dev/null
@@ -0,0 +1,94 @@
+package jalview.io.gff;
+
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class SequenceOntologyTest
+{
+  private SequenceOntology so;
+
+  @BeforeMethod
+  public void setUp() {
+    long now = System.currentTimeMillis();
+    so = SequenceOntology.getInstance();
+    long elapsed = System.currentTimeMillis() - now;
+    System.out.println("Load and cache of Sequence Ontology took "
+            + elapsed + "ms");
+  }
+
+  @Test(groups = "Functional")
+  public void testIsA()
+  {
+    assertFalse(so.isA(null, null));
+    assertFalse(so.isA(null, "SO:0000087"));
+    assertFalse(so.isA("SO:0000087", null));
+    assertFalse(so.isA("complete", "garbage"));
+
+    assertTrue(so.isA("SO:0000087", "SO:0000704"));
+    assertFalse(so.isA("SO:0000704", "SO:0000087"));
+    assertTrue(so.isA("SO:0000736", "SO:0000735"));
+
+    // same thing:
+    assertTrue(so.isA("micronuclear_sequence", "micronuclear_sequence"));
+    // direct parent:
+    assertTrue(so.isA("micronuclear_sequence", "organelle_sequence"));
+    // grandparent:
+    assertTrue(so.isA("micronuclear_sequence", "sequence_location"));
+    // great-grandparent:
+    assertTrue(so.isA("micronuclear_sequence", "sequence_attribute"));
+
+    // same thing by name / description:
+    assertTrue(so.isA("micronuclear_sequence", "SO:0000084"));
+    assertTrue(so.isA("SO:0000084", "micronuclear_sequence"));
+    assertTrue(so.isA("SO:0000084", "SO:0000084"));
+
+    // SO name to description:
+    assertTrue(so.isA("SO:0000084", "organelle_sequence"));
+    assertTrue(so.isA("SO:0000084", "sequence_location"));
+    assertTrue(so.isA("SO:0000084", "sequence_attribute"));
+
+    // description to SO name:
+    assertTrue(so.isA("micronuclear_sequence", "SO:0000736"));
+    assertTrue(so.isA("micronuclear_sequence", "SO:0000735"));
+    assertTrue(so.isA("micronuclear_sequence", "SO:0000400"));
+  }
+
+  @Test(groups = "Functional")
+  public void testIsProteinMatch()
+  {
+    assertTrue(so.isProteinMatch("protein_match"));
+    assertTrue(so.isProteinMatch("protein_hmm_match"));
+    assertFalse(so.isProteinMatch("Protein_match")); // case-sensitive
+  }
+
+  @Test(groups = "Functional")
+  public void testIsNucleotideMatch()
+  {
+    assertTrue(so.isNucleotideMatch("nucleotide_match"));
+    assertTrue(so.isNucleotideMatch("primer_match"));
+    assertTrue(so.isNucleotideMatch("cross_genome_match"));
+    assertTrue(so.isNucleotideMatch("expressed_sequence_match"));
+    assertTrue(so.isNucleotideMatch("translated_nucleotide_match"));
+    assertTrue(so.isNucleotideMatch("UST_match"));
+    assertTrue(so.isNucleotideMatch("RST_match"));
+    assertTrue(so.isNucleotideMatch("cDNA_match"));
+    assertTrue(so.isNucleotideMatch("EST_match"));
+    assertFalse(so.isNucleotideMatch("match")); // parent
+  }
+
+  @Test(groups = "Functional")
+  public void testIsCDS()
+  {
+    assertTrue(so.isA("CDS", "CDS"));
+    assertTrue(so.isA("CDS_predicted", "CDS"));
+    assertTrue(so.isA("transposable_element_CDS", "CDS"));
+    assertTrue(so.isA("edited_CDS", "CDS"));
+    assertTrue(so.isA("CDS_independently_known", "CDS"));
+    assertTrue(so.isA("CDS_fragment", "CDS"));
+    assertFalse(so.isA("CDS_region", "CDS"));// part_of
+    assertFalse(so.isA("polypeptide", "CDS")); // derives_from
+  }
+}
index e9fa336..ddee3ac 100644 (file)
@@ -29,8 +29,8 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.FormatAdapter;
 
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
@@ -54,21 +54,21 @@ public class StructureSelectionManagerTest
     AlignedCodonFrame acf2 = new AlignedCodonFrame();
 
     ssm.registerMapping(acf1);
-    assertEquals(1, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
+    assertEquals(1, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
 
     ssm.registerMapping(acf2);
-    assertEquals(2, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
-    assertTrue(ssm.seqmappings.contains(acf2));
+    assertEquals(2, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
+    assertTrue(ssm.getSequenceMappings().contains(acf2));
 
     /*
      * Re-adding the first mapping does nothing
      */
     ssm.registerMapping(acf1);
-    assertEquals(2, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
-    assertTrue(ssm.seqmappings.contains(acf2));
+    assertEquals(2, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
+    assertTrue(ssm.getSequenceMappings().contains(acf2));
   }
 
   @Test(groups = { "Functional" })
@@ -78,10 +78,10 @@ public class StructureSelectionManagerTest
     AlignedCodonFrame acf2 = new AlignedCodonFrame();
     AlignedCodonFrame acf3 = new AlignedCodonFrame();
 
-    Set<AlignedCodonFrame> set1 = new HashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> set1 = new ArrayList<AlignedCodonFrame>();
     set1.add(acf1);
     set1.add(acf2);
-    Set<AlignedCodonFrame> set2 = new HashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> set2 = new ArrayList<AlignedCodonFrame>();
     set2.add(acf2);
     set2.add(acf3);
 
@@ -93,10 +93,10 @@ public class StructureSelectionManagerTest
     ssm.registerMappings(set2);
     ssm.registerMappings(set2);
 
-    assertEquals(3, ssm.seqmappings.size());
-    assertTrue(ssm.seqmappings.contains(acf1));
-    assertTrue(ssm.seqmappings.contains(acf2));
-    assertTrue(ssm.seqmappings.contains(acf3));
+    assertEquals(3, ssm.getSequenceMappings().size());
+    assertTrue(ssm.getSequenceMappings().contains(acf1));
+    assertTrue(ssm.getSequenceMappings().contains(acf2));
+    assertTrue(ssm.getSequenceMappings().contains(acf3));
   }
 
   /**
index 7a6dc35..e1eb2a6 100644 (file)
@@ -96,6 +96,7 @@ public class DBRefUtilsTest
     assertEquals("UNIPROT", DBRefUtils.getCanonicalName("UNIPROTKB/TREMBL"));
     assertEquals("UNIPROTKB/SWISS-CHEESE",
             DBRefUtils.getCanonicalName("UNIPROTKB/SWISS-CHEESE"));
+    assertEquals("ENSEMBL", DBRefUtils.getCanonicalName("Ensembl"));
   }
 
   @Test(groups = { "Functional" })
index de3994e..2520de0 100644 (file)
@@ -23,7 +23,9 @@ package jalview.util;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -450,6 +452,36 @@ public class MapListTest
     assertEquals("{[2, 3], [5, 7], [9, 10], [12, 12], [14, 14], [16, 18]}",
             prettyPrint(ml2.getFromRanges()));
     assertEquals("{[1, 1], [3, 4], [6, 6]}", prettyPrint(ml2.getToRanges()));
+
+    /*
+     * reverse direction
+     */
+    codons = new int[] { 9, 6 };
+    protein = new int[] { 100, 91, 80, 79 };
+    ml = new MapList(codons, protein, 3, 1);
+    assertEquals(6, ml.getFromLowest());
+    assertEquals(9, ml.getFromHighest());
+    assertEquals(79, ml.getToLowest());
+    assertEquals(100, ml.getToHighest());
+  }
+
+  /**
+   * Test constructor can merge consecutive ranges
+   */
+  @Test(groups = { "Functional" })
+  public void testConstructor_mergeRanges()
+  {
+    int[] codons = { 2, 3, 3, 7, 9, 10, 12, 12, 14, 14, 16, 17 };
+    int[] protein = { 1, 1, 1, 3, 6, 6 };
+    MapList ml = new MapList(codons, protein, 3, 1);
+    assertEquals(3, ml.getFromRatio());
+    assertEquals(2, ml.getFromLowest());
+    assertEquals(17, ml.getFromHighest());
+    assertEquals(1, ml.getToLowest());
+    assertEquals(6, ml.getToHighest());
+    assertEquals("{[2, 7], [9, 10], [12, 12], [14, 14], [16, 17]}",
+            prettyPrint(ml.getFromRanges()));
+    assertEquals("{[1, 3], [6, 6]}", prettyPrint(ml.getToRanges()));
   }
 
   /**
@@ -503,7 +535,192 @@ public class MapListTest
     MapList ml = new MapList(new int[] { 1, 5, 10, 15, 25, 20 }, new int[] {
         51, 1 }, 1, 3);
     String s = ml.toString();
-    assertEquals("From (1:3) [ [1, 5] [10, 15] [25, 20] ] To [ [51, 1] ]",
+    assertEquals("[ [1, 5] [10, 15] [25, 20] ] To [ [51, 1] ]",
             s);
   }
+
+  @Test(groups = { "Functional" })
+  public void testAddMapList()
+  {
+    MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 },
+            new int[] { 72, 22 }, 1, 3);
+    assertEquals(11, ml.getFromLowest());
+    assertEquals(35, ml.getFromHighest());
+    assertEquals(22, ml.getToLowest());
+    assertEquals(72, ml.getToHighest());
+
+    MapList ml2 = new MapList(new int[] { 2, 4, 37, 40 }, new int[] { 12,
+        17, 78, 83, 88, 96 }, 1, 3);
+    ml.addMapList(ml2);
+    assertEquals(2, ml.getFromLowest());
+    assertEquals(40, ml.getFromHighest());
+    assertEquals(12, ml.getToLowest());
+    assertEquals(96, ml.getToHighest());
+
+    String s = ml.toString();
+    assertEquals(
+            "[ [11, 15] [20, 25] [35, 30] [2, 4] [37, 40] ] To [ [72, 22] [12, 17] [78, 83] [88, 96] ]",
+            s);
+  }
+
+  @Test(groups = { "Functional" })
+  public void testAddMapList_contiguous()
+  {
+    MapList ml = new MapList(new int[] { 11, 15 }, new int[] { 72, 58 }, 1,
+            3);
+
+    MapList ml2 = new MapList(new int[] { 15, 16 }, new int[] { 58, 53 },
+            1, 3);
+    ml.addMapList(ml2);
+    assertEquals("[ [11, 16] ] To [ [72, 53] ]", ml.toString());
+  }
+
+  @Test(groups = "Functional")
+  public void testAddRange()
+  {
+    int[] range = { 1, 5 };
+    List<int[]> ranges = new ArrayList<int[]>();
+
+    // add to empty list:
+    MapList.addRange(range, ranges);
+    assertEquals(1, ranges.size());
+    assertSame(range, ranges.get(0));
+
+    // extend contiguous (same position):
+    MapList.addRange(new int[] { 5, 10 }, ranges);
+    assertEquals(1, ranges.size());
+    assertEquals(1, ranges.get(0)[0]);
+    assertEquals(10, ranges.get(0)[1]);
+
+    // extend contiguous (next position):
+    MapList.addRange(new int[] { 11, 15 }, ranges);
+    assertEquals(1, ranges.size());
+    assertEquals(1, ranges.get(0)[0]);
+    assertEquals(15, ranges.get(0)[1]);
+
+    // change direction: range is not merged:
+    MapList.addRange(new int[] { 16, 10 }, ranges);
+    assertEquals(2, ranges.size());
+    assertEquals(16, ranges.get(1)[0]);
+    assertEquals(10, ranges.get(1)[1]);
+
+    // extend reverse contiguous (same position):
+    MapList.addRange(new int[] { 10, 8 }, ranges);
+    assertEquals(2, ranges.size());
+    assertEquals(16, ranges.get(1)[0]);
+    assertEquals(8, ranges.get(1)[1]);
+
+    // extend reverse contiguous (next position):
+    MapList.addRange(new int[] { 7, 6 }, ranges);
+    assertEquals(2, ranges.size());
+    assertEquals(16, ranges.get(1)[0]);
+    assertEquals(6, ranges.get(1)[1]);
+
+    // change direction: range is not merged:
+    MapList.addRange(new int[] { 6, 9 }, ranges);
+    assertEquals(3, ranges.size());
+    assertEquals(6, ranges.get(2)[0]);
+    assertEquals(9, ranges.get(2)[1]);
+
+    // not contiguous: not merged
+    MapList.addRange(new int[] { 11, 12 }, ranges);
+    assertEquals(4, ranges.size());
+    assertEquals(11, ranges.get(3)[0]);
+    assertEquals(12, ranges.get(3)[1]);
+  }
+
+  /**
+   * Check state after construction
+   */
+  @Test(groups = { "Functional" })
+  public void testConstructor_withLists()
+  {
+    /*
+     * reverse direction
+     */
+    int[][] codons = new int[][] { { 9, 6 } };
+    int[][] protein = new int[][] { { 100, 91 }, { 80, 79 } };
+    MapList ml = new MapList(Arrays.asList(codons), Arrays.asList(protein),
+            3, 1);
+    assertEquals(6, ml.getFromLowest());
+    assertEquals(9, ml.getFromHighest());
+    assertEquals(79, ml.getToLowest());
+    assertEquals(100, ml.getToHighest());
+  }
+
+  /**
+   * Test that method that inspects for the (first) forward or reverse from
+   * range. Single position ranges are ignored.
+   */
+  @Test(groups = { "Functional" })
+  public void testIsFromForwardStrand()
+  {
+    MapList ml = new MapList(new int[] { 2, 2, 3, 9, 12, 11 },
+            new int[] { 20, 11 }, 1, 1);
+    assertTrue(ml.isFromForwardStrand());
+
+    ml = new MapList(new int[] { 2, 2, 11, 5, 13, 14 },
+            new int[] { 20, 11 }, 1, 1);
+    assertFalse(ml.isFromForwardStrand());
+
+    ml = new MapList(new int[] { 2, 2, 4, 4, 6, 6 }, new int[] { 3, 1 }, 1,
+            1);
+    assertTrue(ml.isFromForwardStrand());
+  }
+
+  /**
+   * Test the method that merges a list of ranges where possible
+   */
+  @Test(groups = { "Functional" })
+  public void testCoalesceRanges()
+  {
+    assertNull(MapList.coalesceRanges(null));
+    List<int[]> ranges = new ArrayList<int[]>();
+    assertSame(ranges, MapList.coalesceRanges(ranges));
+    ranges.add(new int[] { 1, 3 });
+    assertSame(ranges, MapList.coalesceRanges(ranges));
+
+    // add non-contiguous range:
+    ranges.add(new int[] { 5, 6 });
+    assertSame(ranges, MapList.coalesceRanges(ranges));
+
+    // 'contiguous' range in opposite direction is not merged:
+    ranges.add(new int[] { 7, 6 });
+    assertSame(ranges, MapList.coalesceRanges(ranges));
+
+    // merging in forward direction:
+    ranges.clear();
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 4, 5 });
+    ranges.add(new int[] { 5, 5 });
+    ranges.add(new int[] { 5, 7 });
+    List<int[]> merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 1, 7 }, merged.get(0));
+
+    // merging in reverse direction:
+    ranges.clear();
+    ranges.add(new int[] { 7, 5 });
+    ranges.add(new int[] { 5, 4 });
+    ranges.add(new int[] { 4, 4 });
+    ranges.add(new int[] { 3, 1 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(1, merged.size());
+    assertArrayEquals(new int[] { 7, 1 }, merged.get(0));
+
+    // merging with switches of direction:
+    ranges.clear();
+    ranges.add(new int[] { 1, 3 });
+    ranges.add(new int[] { 4, 5 });
+    ranges.add(new int[] { 5, 5 });
+    ranges.add(new int[] { 6, 6 });
+    ranges.add(new int[] { 12, 10 });
+    ranges.add(new int[] { 9, 8 });
+    ranges.add(new int[] { 8, 8 });
+    ranges.add(new int[] { 7, 7 });
+    merged = MapList.coalesceRanges(ranges);
+    assertEquals(2, merged.size());
+    assertArrayEquals(new int[] { 1, 6 }, merged.get(0));
+    assertArrayEquals(new int[] { 12, 7 }, merged.get(1));
+  }
 }
index 51c99af..7100381 100644 (file)
@@ -43,12 +43,9 @@ import jalview.io.FormatAdapter;
 
 import java.awt.Color;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.testng.annotations.Test;
 
@@ -77,7 +74,8 @@ public class MappingUtilsTest
     MapList map = new MapList(new int[] { 5, 10 }, new int[] { 12, 13 }, 3,
             1);
     acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     /*
      * Check protein residue 12 maps to codon 5-7, 13 to codon 8-10
@@ -129,7 +127,8 @@ public class MappingUtilsTest
     MapList map = new MapList(new int[] { 6, 6, 8, 9, 11, 11, 13, 13, 15,
         15 }, new int[] { 8, 9 }, 3, 1);
     acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     /*
      * Check protein residue 8 maps to [6, 8, 9]
@@ -209,7 +208,8 @@ public class MappingUtilsTest
       acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
               .getSequenceAt(seq).getDatasetSequence(), map);
     }
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     AlignViewportI dnaView = new AlignViewport(cdna);
     AlignViewportI proteinView = new AlignViewport(protein);
@@ -343,8 +343,10 @@ public class MappingUtilsTest
   protected void setupMappedAlignments() throws IOException
   {
     /*
-     * Set up dna and protein Seq1/2/3 with mappings (held on the protein
-     * viewport). Lower case for introns.
+     * Map (upper-case = coding):
+     * Seq1/10-18 AC-GctGtC-T to Seq1/40 -K-P
+     * Seq2/20-27 Tc-GA-G-T-T to Seq2/20-27 L--Q
+     * Seq3/30-38 TtTT-AaCGg- to Seq3/60-61\nG--S
      */
     AlignmentI cdna = loadAlignment(">Seq1/10-18\nAC-GctGtC-T\n"
             + ">Seq2/20-27\nTc-GA-G-T-Tc\n" + ">Seq3/30-38\nTtTT-AaCGg-\n",
@@ -373,7 +375,8 @@ public class MappingUtilsTest
         61 }, 3, 1);
     acf.addMap(cdna.getSequenceAt(2).getDatasetSequence(), protein
             .getSequenceAt(2).getDatasetSequence(), map);
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     dnaView = new AlignViewport(cdna);
     proteinView = new AlignViewport(protein);
@@ -442,7 +445,7 @@ public class MappingUtilsTest
             "[1, 2, 3, 4, 7, 8, 9, 12]",
             Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4, 7,
                 9, 12, 12 })));
-    // unpaired start position is ignored:
+    // trailing unpaired start position is ignored:
     assertEquals(
             "[1, 2, 3, 4, 7, 8, 9, 12]",
             Arrays.toString(MappingUtils.flattenRanges(new int[] { 1, 4, 7,
@@ -474,7 +477,8 @@ public class MappingUtilsTest
       acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
               .getSequenceAt(seq).getDatasetSequence(), map);
     }
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     AlignViewportI dnaView = new AlignViewport(cdna);
     AlignViewportI proteinView = new AlignViewport(protein);
@@ -556,7 +560,8 @@ public class MappingUtilsTest
       acf.addMap(cdna.getSequenceAt(seq).getDatasetSequence(), protein
               .getSequenceAt(seq).getDatasetSequence(), map);
     }
-    Set<AlignedCodonFrame> acfList = Collections.singleton(acf);
+    List<AlignedCodonFrame> acfList = Arrays.asList(new AlignedCodonFrame[]
+    { acf });
 
     AlignViewportI dnaView = new AlignViewport(cdna);
     AlignViewportI proteinView = new AlignViewport(protein);
@@ -651,7 +656,7 @@ public class MappingUtilsTest
     AlignedCodonFrame acf3 = new AlignedCodonFrame();
     acf3.addMap(seq3.getDatasetSequence(), seq1.getDatasetSequence(), map);
 
-    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     mappings.add(acf1);
     mappings.add(acf2);
     mappings.add(acf3);
@@ -707,7 +712,7 @@ public class MappingUtilsTest
     AlignedCodonFrame acf = new AlignedCodonFrame();
     MapList map = new MapList(new int[] { 8, 16 }, new int[] { 5, 7 }, 3, 1);
     acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map);
-    Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+    List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     mappings.add(acf);
 
     AlignmentI prot = new Alignment(new SequenceI[] { protein });
@@ -738,6 +743,45 @@ public class MappingUtilsTest
   }
 
   /**
+   * Tests for the method that converts a series of [start, end] ranges to
+   * single positions, where the mapping is to a reverse strand i.e. start is
+   * greater than end point mapped to
+   */
+  @Test(groups = { "Functional" })
+  public void testFlattenRanges_reverseStrand()
+  {
+    assertEquals("[4, 3, 2, 1]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 1 })));
+    assertEquals(
+            "[4, 3, 2, 1]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 3, 2,
+                1 })));
+    assertEquals(
+            "[4, 3, 2, 1]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 4, 3,
+                3, 2, 2, 1, 1 })));
+    assertEquals(
+            "[12, 9, 8, 7, 4, 3, 2, 1]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12,
+                9, 7, 4, 1 })));
+    // forwards and backwards anyone?
+    assertEquals(
+            "[4, 5, 6, 3, 2, 1]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 6, 3,
+                1 })));
+    // backwards and forwards
+    assertEquals(
+            "[3, 2, 1, 4, 5, 6]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 3, 1, 4,
+                6 })));
+    // trailing unpaired start position is ignored:
+    assertEquals(
+            "[12, 9, 8, 7, 4, 3, 2]",
+            Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12,
+                9, 7, 4, 2, 1 })));
+  }
+
+  /**
    * Test mapping a column selection including hidden columns
    * 
    * @throws IOException
@@ -747,62 +791,66 @@ public class MappingUtilsTest
   {
     setupMappedAlignments();
   
-    ColumnSelection colsel = new ColumnSelection();
+    ColumnSelection proteinSelection = new ColumnSelection();
 
     /*
      * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3
      * in dna respectively, overall 0-4
      */
-    colsel.hideColumns(0);
-    ColumnSelection cs = MappingUtils.mapColumnSelection(colsel,
+    proteinSelection.hideColumns(0);
+    ColumnSelection dnaSelection = MappingUtils.mapColumnSelection(proteinSelection,
             proteinView, dnaView);
-    assertEquals("[]", cs.getSelected().toString());
-    List<int[]> hidden = cs.getHiddenColumns();
+    assertEquals("[]", dnaSelection.getSelected().toString());
+    List<int[]> hidden = dnaSelection.getHiddenColumns();
     assertEquals(1, hidden.size());
     assertEquals("[0, 4]", Arrays.toString(hidden.get(0)));
 
     /*
      * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna
      */
-    colsel.revealAllHiddenColumns();
-    colsel.hideColumns(1);
-    cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
-    hidden = cs.getHiddenColumns();
+    proteinSelection.revealAllHiddenColumns();
+    // the unhidden columns are now marked selected!
+    assertEquals("[0]", proteinSelection.getSelected().toString());
+    // deselect these or hideColumns will be expanded to include 0
+    proteinSelection.clear();
+    proteinSelection.hideColumns(1);
+    dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
+    hidden = dnaSelection.getHiddenColumns();
     assertEquals(1, hidden.size());
     assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
 
     /*
      * Column 2 in protein picks up gaps only - no mapping
      */
-    colsel.revealAllHiddenColumns();
-    colsel.clear();
-    colsel.hideColumns(2);
-    cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
-    assertTrue(cs.getHiddenColumns().isEmpty());
+    proteinSelection.revealAllHiddenColumns();
+    proteinSelection.clear();
+    proteinSelection.hideColumns(2);
+    dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
+    assertTrue(dnaSelection.getHiddenColumns().isEmpty());
 
     /*
      * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns
      * 6-9, 6-10, 5-8 respectively, overall to 5-10
      */
-    colsel.revealAllHiddenColumns();
-    colsel.clear();
-    colsel.hideColumns(3); // 5-10 hidden in dna
-    colsel.addElement(1); // 0-3 selected in dna
-    cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
-    assertEquals("[0, 1, 2, 3]", cs.getSelected().toString());
-    hidden = cs.getHiddenColumns();
+    proteinSelection.revealAllHiddenColumns();
+    proteinSelection.clear();
+    proteinSelection.hideColumns(3); // 5-10 hidden in dna
+    proteinSelection.addElement(1); // 0-3 selected in dna
+    dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
+    assertEquals("[0, 1, 2, 3]", dnaSelection.getSelected().toString());
+    hidden = dnaSelection.getHiddenColumns();
     assertEquals(1, hidden.size());
     assertEquals("[5, 10]", Arrays.toString(hidden.get(0)));
 
     /*
      * Combine hiding columns 1 and 3 to get discontiguous hidden columns
      */
-    colsel.revealAllHiddenColumns();
-    colsel.clear();
-    colsel.hideColumns(1);
-    colsel.hideColumns(3);
-    cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView);
-    hidden = cs.getHiddenColumns();
+    proteinSelection.revealAllHiddenColumns();
+    proteinSelection.clear();
+    proteinSelection.hideColumns(1);
+    proteinSelection.hideColumns(3);
+    dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView);
+    hidden = dnaSelection.getHiddenColumns();
     assertEquals(2, hidden.size());
     assertEquals("[0, 3]", Arrays.toString(hidden.get(0)));
     assertEquals("[5, 10]", Arrays.toString(hidden.get(1)));
index 2342afe..dc2555b 100644 (file)
@@ -24,7 +24,9 @@ import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertTrue;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 import org.testng.annotations.Test;
 
@@ -128,4 +130,39 @@ public class StringUtilsTest
     assertEquals("a*b*c*cde", StringUtils.arrayToSeparatorList(new String[]
     { "a", "b*c", "cde" }, "*"));
   }
+
+  @Test(groups = { "Functional" })
+  public void testListToDelimitedString()
+  {
+    assertEquals("", StringUtils.listToDelimitedString(null, ";"));
+    List<String> list = new ArrayList<String>();
+    assertEquals("", StringUtils.listToDelimitedString(list, ";"));
+    list.add("now");
+    assertEquals("now", StringUtils.listToDelimitedString(list, ";"));
+    list.add("is");
+    assertEquals("now;is", StringUtils.listToDelimitedString(list, ";"));
+    assertEquals("now ; is", StringUtils.listToDelimitedString(list, " ; "));
+    list.add("the");
+    list.add("winter");
+    list.add("of");
+    list.add("our");
+    list.add("discontent");
+    assertEquals("now is the winter of our discontent",
+            StringUtils.listToDelimitedString(list, " "));
+  }
+
+  @Test(groups = { "Functional" })
+  public void testParseInt()
+  {
+    assertEquals(0, StringUtils.parseInt(null));
+    assertEquals(0, StringUtils.parseInt(""));
+    assertEquals(0, StringUtils.parseInt("x"));
+    assertEquals(0, StringUtils.parseInt("1.2"));
+    assertEquals(33, StringUtils.parseInt("33"));
+    assertEquals(33, StringUtils.parseInt("+33"));
+    assertEquals(-123, StringUtils.parseInt("-123"));
+    // too big for an int:
+    assertEquals(0,
+            StringUtils.parseInt(String.valueOf(Integer.MAX_VALUE) + "1"));
+  }
 }
diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java
new file mode 100644 (file)
index 0000000..7a9b553
--- /dev/null
@@ -0,0 +1,232 @@
+package jalview.ws;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.ws.seqfetcher.ASequenceFetcher;
+import jalview.ws.seqfetcher.DbSourceProxy;
+
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Vector;
+
+public class SequenceFetcherTest
+{
+
+  /**
+   * simple run method to test dbsources.
+   * 
+   * @param argv
+   */
+  public static void main(String[] argv)
+  {
+    // TODO: extracted from SequenceFetcher - convert to proper unit test with
+    // assertions
+
+    AlignmentI ds = null;
+    Vector noProds = new Vector();
+    String usage = "SequenceFetcher.main [-nodas] [<DBNAME> [<ACCNO>]]\n"
+            + "With no arguments, all DbSources will be queried with their test Accession number.\n"
+            + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n"
+            + "If given two arguments, SequenceFetcher will try to find the DbFetcher corresponding to <DBNAME> and retrieve <ACCNO> from it.\n"
+            + "The -nodas option will exclude DAS sources from the database fetchers Jalview will try to use.";
+    boolean withDas = true;
+    if (argv != null && argv.length > 0
+            && argv[0].toLowerCase().startsWith("-nodas"))
+    {
+      withDas = false;
+      String targs[] = new String[argv.length - 1];
+      System.arraycopy(argv, 1, targs, 0, targs.length);
+      argv = targs;
+    }
+    if (argv != null && argv.length > 0)
+    {
+      List<DbSourceProxy> sps = new SequenceFetcher(withDas)
+              .getSourceProxy(argv[0]);
+  
+      if (sps != null)
+      {
+        for (DbSourceProxy sp : sps)
+        {
+          AlignmentI al = null;
+          try
+          {
+            al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp
+                    .getTestQuery());
+          } catch (Exception e)
+          {
+            e.printStackTrace();
+            System.err.println("Error when retrieving "
+                    + (argv.length > 1 ? argv[1] : sp.getTestQuery())
+                    + " from " + argv[0] + "\nUsage: " + usage);
+          }
+          SequenceI[] prod = al.getSequencesArray();
+          if (al != null)
+          {
+            for (int p = 0; p < prod.length; p++)
+            {
+              System.out.println("Prod " + p + ": "
+                      + prod[p].getDisplayId(true) + " : "
+                      + prod[p].getDescription());
+            }
+          }
+        }
+        return;
+      }
+      else
+      {
+        System.err.println("Can't resolve " + argv[0]
+                + " as a database name. Allowed values are :\n"
+                + new SequenceFetcher().getSupportedDb());
+      }
+      System.out.println(usage);
+      return;
+    }
+    ASequenceFetcher sfetcher = new SequenceFetcher(withDas);
+    String[] dbSources = sfetcher.getSupportedDb();
+    for (int dbsource = 0; dbsource < dbSources.length; dbsource++)
+    {
+      String db = dbSources[dbsource];
+      // skip me
+      if (db.equals(DBRefSource.PDB))
+      {
+        continue;
+      }
+      for (DbSourceProxy sp : sfetcher.getSourceProxy(db))
+      {
+        System.out.println("Source: " + sp.getDbName() + " (" + db
+                + "): retrieving test:" + sp.getTestQuery());
+        AlignmentI al = null;
+        try
+        {
+          al = sp.getSequenceRecords(sp.getTestQuery());
+          if (al != null && al.getHeight() > 0)
+          {
+            boolean dna = sp.isDnaCoding();
+            // try and find products
+            String types[] = jalview.analysis.CrossRef
+                    .findSequenceXrefTypes(dna, al.getSequencesArray());
+            if (types != null)
+            {
+              System.out.println("Xref Types for: "
+                      + (dna ? "dna" : "prot"));
+              for (int t = 0; t < types.length; t++)
+              {
+                System.out.println("Type: " + types[t]);
+                SequenceI[] prod = jalview.analysis.CrossRef
+                        .findXrefSequences(al.getSequencesArray(), dna,
+                                types[t]).getSequencesArray();
+                System.out.println("Found "
+                        + ((prod == null) ? "no" : "" + prod.length)
+                        + " products");
+                if (prod != null)
+                {
+                  for (int p = 0; p < prod.length; p++)
+                  {
+                    System.out.println("Prod " + p + ": "
+                            + prod[p].getDisplayId(true));
+                  }
+                }
+              }
+            }
+            else
+            {
+              noProds.addElement((dna ? new Object[] { al, al }
+                      : new Object[] { al }));
+            }
+  
+          }
+        } catch (Exception ex)
+        {
+          System.out.println("ERROR:Failed to retrieve test query.");
+          ex.printStackTrace(System.out);
+        }
+  
+        if (al == null)
+        {
+          System.out.println("ERROR:No alignment retrieved.");
+          StringBuffer raw = sp.getRawRecords();
+          if (raw != null)
+          {
+            System.out.println(raw.toString());
+          }
+          else
+          {
+            System.out.println("ERROR:No Raw results.");
+          }
+        }
+        else
+        {
+          System.out.println("Retrieved " + al.getHeight() + " sequences.");
+          for (int s = 0; s < al.getHeight(); s++)
+          {
+            SequenceI sq = al.getSequenceAt(s);
+            while (sq.getDatasetSequence() != null)
+            {
+              sq = sq.getDatasetSequence();
+  
+            }
+            if (ds == null)
+            {
+              ds = new Alignment(new SequenceI[] { sq });
+  
+            }
+            else
+            {
+              ds.addSequence(sq);
+            }
+          }
+        }
+        System.out.flush();
+        System.err.flush();
+  
+      }
+      if (noProds.size() > 0)
+      {
+        Enumeration ts = noProds.elements();
+        while (ts.hasMoreElements())
+  
+        {
+          Object[] typeSq = (Object[]) ts.nextElement();
+          boolean dna = (typeSq.length > 1);
+          AlignmentI al = (AlignmentI) typeSq[0];
+          System.out.println("Trying getProducts for "
+                  + al.getSequenceAt(0).getDisplayId(true));
+          System.out.println("Search DS Xref for: "
+                  + (dna ? "dna" : "prot"));
+          // have a bash at finding the products amongst all the retrieved
+          // sequences.
+          SequenceI[] seqs = al.getSequencesArray();
+          Alignment prodal = jalview.analysis.CrossRef.findXrefSequences(
+                  seqs, dna, null, ds);
+          System.out.println("Found "
+                  + ((prodal == null) ? "no" : "" + prodal.getHeight())
+                  + " products");
+          if (prodal != null)
+          {
+            SequenceI[] prod = prodal.getSequencesArray(); // note
+            // should
+            // test
+            // rather
+            // than
+            // throw
+            // away
+            // codon
+            // mapping
+            // (if
+            // present)
+            for (int p = 0; p < prod.length; p++)
+            {
+              System.out.println("Prod " + p + ": "
+                      + prod[p].getDisplayId(true));
+            }
+          }
+        }
+  
+      }
+  
+    }
+  }
+
+}
index 5426fce..46feebc 100644 (file)
  */
 package jalview.ws.jabaws;
 
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
 import jalview.ws.jws2.Jws2Discoverer;
 
 import java.util.Vector;
 
+import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -54,7 +52,7 @@ public class JalviewJabawsTestUtils
   @Test(groups = { "Functional" }, enabled = false)
   public void testAnnotExport()
   {
-    fail("Not yet implemented");
+    Assert.fail("Not yet implemented");
   }
 
   public static jalview.ws.jws2.Jws2Discoverer getJabawsDiscoverer()
@@ -85,10 +83,11 @@ public class JalviewJabawsTestUtils
     } catch (Exception e)
     {
       e.printStackTrace();
-      fail("Aborting. Problem discovering services. Tried " + svcurls);
+      Assert.fail("Aborting. Problem discovering services. Tried "
+              + svcurls);
     }
-    assertTrue("Failed to discover any services at ", disc.getServices()
-            .size() > 0);
+    Assert.assertTrue(disc.getServices().size() > 0,
+            "Failed to discover any services at ");
     return disc;
   }
 
index 902498b..fae5778 100644 (file)
@@ -33,6 +33,8 @@ import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.util.DBRefUtils;
 import jalview.ws.SequenceFetcher;
+import jalview.ws.dbsources.Pdb;
+import jalview.ws.dbsources.Uniprot;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -67,7 +69,7 @@ public class DbRefFetcherTest
 
   /**
    * Tests that standard protein database sources include Uniprot (as the first)
-   * and also PDB. (Additional sources are dependent on available of DAS
+   * and also PDB. (Additional sources are dependent on availability of DAS
    * services.)
    */
   @Test(groups = { "Functional" })
@@ -75,30 +77,31 @@ public class DbRefFetcherTest
   {
     String[] defdb = DBRefSource.PROTEINDBS;
     List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
+    SequenceFetcher sfetcher = new SequenceFetcher();
+    boolean pdbFound = false;
+
     for (String ddb : defdb)
     {
-      SequenceFetcher sfetcher = new SequenceFetcher();
       List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb);
 
       if (srcesfordb != null)
       {
+        // TODO is this right? get duplicate entries
         srces.addAll(srcesfordb);
       }
     }
 
     int i = 0;
     int uniprotPos = -1;
-    int pdbPos = -1;
-    // append the selected sequence sources to the default dbs
     for (DbSourceProxy s : srces)
     {
-      if (s instanceof jalview.ws.dbsources.Uniprot)
+      if (s instanceof Uniprot && uniprotPos == -1)
       {
         uniprotPos = i;
       }
-      if (s instanceof jalview.ws.dbsources.Pdb)
+      if (s instanceof Pdb)
       {
-        pdbPos = i;
+        pdbFound = true;
       }
       i++;
     }
@@ -107,7 +110,7 @@ public class DbRefFetcherTest
             + srces.size() + " sources (source was at position "
             + uniprotPos + ")", uniprotPos == 0);
     assertTrue("Failed to find PDB source amongst " + srces.size()
-            + " sources", pdbPos >= 0);
+            + " sources", pdbFound);
   }
 
   /**
@@ -167,7 +170,8 @@ public class DbRefFetcherTest
                     sfs[0].getType()));
     assertEquals(embl.getDbSource(), sfs[0].getFeatureGroup());
     DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRefs(),
-            DBRefSource.PROTEINSEQ);
+            new String[] { DBRefSource.UNIPROT, DBRefSource.UNIPROTKB,
+                DBRefSource.EMBLCDSProduct, DBRefSource.ENSEMBL });
     assertNotNull(dr);
     assertEquals("Expected a single Uniprot cross reference", 1, dr.length);
     assertEquals("Expected cross reference map to be one amino acid", dr[0]
index cd9987c..83d1a98 100755 (executable)
@@ -2839,6 +2839,58 @@ and any path to a file to save to the file]]></string>
                                                        </property>
                                                </object>
                                        </method>
+                                       <method name="addElement">
+                                               <object class="com.zerog.ia.installer.actions.InstallZipfile" objectID="1000ddddfab93">
+                                                       <property name="belongsToUninstallPhase">
+                                                               <boolean>false</boolean>
+                                                       </property>
+                                                       <property name="rollbackEnabledCancel">
+                                                               <boolean>true</boolean>
+                                                       </property>
+                                                       <property name="rollbackEnabledError">
+                                                               <boolean>true</boolean>
+                                                       </property>
+                                                       <property name="ruleExpression">
+                                                               <string><![CDATA[]]></string>
+                                                       </property>
+                                                       <property name="unixPermissions">
+                                                               <string><![CDATA[664]]></string>
+                                                       </property>
+                                                       <property name="sourceName">
+                                                               <string><![CDATA[htsjdk-1.133.jar]]></string>
+                                                       </property>
+                                                       <property name="overrideUnixPermissions">
+                                                               <boolean>false</boolean>
+                                                       </property>
+                                                       <property name="sourcePath">
+                                                               <string><![CDATA[/home/cruisecontrol/jalview/lib/]]></string>
+                                                       </property>
+                                                       <property name="shouldUninstall">
+                                                               <boolean>true</boolean>
+                                                       </property>
+                                                       <property name="rollbackEnabledCancel">
+                                                               <boolean>true</boolean>
+                                                       </property>
+                                                       <property name="rollbackEnabledError">
+                                                               <boolean>true</boolean>
+                                                       </property>
+                                                       <property name="destinationName">
+                                                               <string><![CDATA[htsjdk-1.133.jar]]></string>
+                                                       </property>
+                                                       <property name="fileSize">
+                                                               <long>16046</long>
+                                                       </property>
+                                                       <property name="macBinary">
+                                                               <boolean>false</boolean>
+                                                       </property>
+                                                       <property name="targetCheckKind">
+                                                               <int>0</int>
+                                                       </property>
+                                                       <property name="ruleExpression">
+                                                               <string><![CDATA[]]></string>
+                                                       </property>
+                                               </object>
+                                       </method>
                                </object>
                        </property>
                        <property name="rulesFailedMessage">
@@ -7307,6 +7359,7 @@ and any path to a file to read from that file]]></string>
                                                                                <object refID="1f46cffffab93"/>
                                                                                <object refID="1f46efeefab93"/>
                                                                                <object refID="1936efeefab93"/>
+                                                                               <object refID="1000ddddfab93"/>
                                                                                <object refID="10936efeefab93"/>
                                                                                <object refID="11936efeefab93"/>
                                                                                <object refID="12936efeefab93"/>
@@ -7894,6 +7947,7 @@ and any path to a file to read from that file]]></string>
                                                                                                <object refID="1f46cffffab93"/>
                                                                                                <object refID="1f46efeefab93"/>
                                                                                                <object refID="1936efeefab93"/>
+                                                                                               <object refID="1000ddddfab93"/>
                                                                                                <object refID="10936efeefab93"/>
                                                                                                <object refID="11936efeefab93"/>
                                                                                                <object refID="12936efeefab93"/>