JAL-1712 fixes/tests for Castor binding and 'show flanking regions'
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 12 May 2015 15:35:32 +0000 (16:35 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 12 May 2015 15:35:32 +0000 (16:35 +0100)
27 files changed:
resources/embl_mapping.xml
src/jalview/analysis/AlignmentUtils.java
src/jalview/datamodel/AlignmentAnnotation.java
src/jalview/datamodel/PDBEntry.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
src/jalview/datamodel/UniprotEntry.java
src/jalview/datamodel/UniprotFile.java
src/jalview/datamodel/UniprotProteinName.java
src/jalview/datamodel/UniprotSequence.java
src/jalview/datamodel/xdb/embl/BasePosition.java
src/jalview/datamodel/xdb/embl/EmblEntry.java
src/jalview/datamodel/xdb/embl/EmblError.java
src/jalview/datamodel/xdb/embl/EmblFeature.java
src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java
src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java
src/jalview/datamodel/xdb/embl/EmblFile.java
src/jalview/datamodel/xdb/embl/EmblSequence.java
src/jalview/datamodel/xdb/embl/Qualifier.java
src/jalview/ws/dbsources/EmblXmlSource.java
src/jalview/ws/dbsources/Uniprot.java
src/jalview/ws/ebi/EBIFetchClient.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/datamodel/AlignmentAnnotationTests.java
test/jalview/datamodel/SequenceTest.java
test/jalview/datamodel/xdb/embl/EmblFileTest.java [new file with mode: 0644]
test/jalview/ws/dbsources/UniprotTest.java [new file with mode: 0644]

index 7199354..ccbde5e 100644 (file)
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.EmblEntry">
-               <map-to xml="entry"/>
                <field name="accession" type="string">
                        <bind-xml location="accession" node="attribute"/>
                </field>
+               <!--  May 2015 changed from last-updated to match xml -->
                <field name="lastUpdated" type="string">
-                       <bind-xml location="last-updated" node="attribute"/>
+                       <bind-xml location="lastUpdated" node="attribute"/>
                </field>
                <field name="version" type="string">
                        <bind-xml location="version" node="attribute"/>
                </field>
-               
                <field name="rCreated" type="string">
                        <bind-xml location="releaseCreated" node="attribute"/>
                </field>
@@ -54,7 +53,7 @@
                <field name="desc" type="string">
                        <bind-xml name="description" node="element"/>
                </field>
-               <field name="Keywords" type="string" collection="vector">
+               <field name="keywords" type="string" collection="vector">
                        <bind-xml name="keyword" node="element"/>
                </field>
                <field name="features" type="jalview.datamodel.xdb.embl.EmblFeature" collection="vector">
                        <bind-xml name="dbreference" />
                </field>
                <field name="sequence" type="jalview.datamodel.xdb.embl.EmblSequence">
-                       <bind-xml name="sequence"/> <!-- location="sequence" node="element"/ -->
+                       <bind-xml name="sequence"/>
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.EmblSequence">
-               <map-to xml="sequence"/>
                <field name="type" type="string">
                        <bind-xml name="type" node="attribute" location="type"/>
                </field>
@@ -80,7 +78,6 @@
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.EmblFeature" verify-constructable="false">
-               <map-to xml="feature"/>
                <field name="name" type="string">
                        <bind-xml name="name" node="attribute"/>
                </field>
                </field>
        </class>
        <class name="jalview.datamodel.DBRefEntry" verify-constructable="false">
-               <field name="accessionId" type="java.lang.String"><!-- set-method="setAccessionId" get-method="getAccessionId"> -->
+               <field name="accessionId" type="java.lang.String">
                        <bind-xml name="primary" node="attribute"/>
                </field>
-               <field name="source" type="java.lang.String"> <!--set-method="setSource" get-method="getSource"> -->
+               <field name="source" type="java.lang.String"> 
                        <bind-xml name="db" node="attribute"/>
                </field>
-               <field name="version" type="string"><!-- set-method="setVersion" get-method="getVersion"> -->
+               <field name="version" type="string">
                        <bind-xml name="secondary" node="attribute"/>
                </field>
        </class>
        <class  name="jalview.datamodel.xdb.embl.Qualifier" verify-constructable="false">
-               <map-to xml="Qualifier"/>
                <field name="name">
                        <bind-xml name="name" node="attribute"/>
                </field>
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.EmblFeatureLocations">
-               <map-to xml="location"/>
                <field name="locationType" type="string">
                        <bind-xml name="type" node="attribute"/>
                </field>
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.EmblFeatureLocElement">
-               <map-to xml="locationElement"/>
                <field name="type" type="string">
                        <bind-xml name="type" node="attribute"/>
                </field>
                </field>
        </class>
        <class name="jalview.datamodel.xdb.embl.BasePosition">
-               <map-to xml="basePosition"/>
                <field name="type">
                        <bind-xml name="type" node="attribute"/>
                </field>
index a4aeac7..df30234 100644 (file)
@@ -77,18 +77,22 @@ public class AlignmentUtils
     for (SequenceI s : core.getSequences())
     {
       SequenceI newSeq = s.deriveSequence();
-      if (newSeq.getStart() > maxoffset
+      final int newSeqStart = newSeq.getStart() - 1;
+      if (newSeqStart > maxoffset
               && newSeq.getDatasetSequence().getStart() < s.getStart())
       {
-        maxoffset = newSeq.getStart();
+        maxoffset = newSeqStart;
       }
       sq.add(newSeq);
     }
     if (flankSize > -1)
     {
-      maxoffset = flankSize;
+      maxoffset = Math.min(maxoffset, flankSize);
     }
-    // now add offset to create a new expanded alignment
+
+    /*
+     * now add offset left and right to create an expanded alignment
+     */
     for (SequenceI s : sq)
     {
       SequenceI ds = s;
@@ -98,8 +102,8 @@ public class AlignmentUtils
       }
       int s_end = s.findPosition(s.getStart() + s.getLength());
       // find available flanking residues for sequence
-      int ustream_ds = s.getStart() - ds.getStart(), dstream_ds = ds
-              .getEnd() - s_end;
+      int ustream_ds = s.getStart() - ds.getStart();
+      int dstream_ds = ds.getEnd() - s_end;
 
       // build new flanked sequence
 
@@ -115,27 +119,27 @@ public class AlignmentUtils
           offset = maxoffset - flankSize;
           ustream_ds = flankSize;
         }
-        if (flankSize < dstream_ds)
+        if (flankSize <= dstream_ds)
         {
-          dstream_ds = flankSize;
+          dstream_ds = flankSize - 1;
         }
       }
+      // TODO use Character.toLowerCase to avoid creating String objects?
       char[] upstream = new String(ds.getSequence(s.getStart() - 1
               - ustream_ds, s.getStart() - 1)).toLowerCase().toCharArray();
-      char[] downstream = new String(ds.getSequence(s_end - 1, s_end + 1
+      char[] downstream = new String(ds.getSequence(s_end - 1, s_end
               + dstream_ds)).toLowerCase().toCharArray();
       char[] coreseq = s.getSequence();
       char[] nseq = new char[offset + upstream.length + downstream.length
               + coreseq.length];
       char c = core.getGapCharacter();
-      // TODO could lowercase the flanking regions
+
       int p = 0;
       for (; p < offset; p++)
       {
         nseq[p] = c;
       }
-      // s.setSequence(new String(upstream).toLowerCase()+new String(coreseq) +
-      // new String(downstream).toLowerCase());
+
       System.arraycopy(upstream, 0, nseq, p, upstream.length);
       System.arraycopy(coreseq, 0, nseq, p + upstream.length,
               coreseq.length);
@@ -153,6 +157,7 @@ public class AlignmentUtils
       {
         for (AlignmentAnnotation aa : s.getAnnotation())
         {
+          aa.adjustForAlignment(); // JAL-1712 fix
           newAl.addAnnotation(aa);
         }
       }
index 1bbe81e..b608139 100755 (executable)
  */
 package jalview.datamodel;
 
-import jalview.analysis.Rna;
-import jalview.analysis.SecStrConsensus.SimpleBP;
-import jalview.analysis.WUSSParseException;
-
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -32,6 +28,10 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import jalview.analysis.Rna;
+import jalview.analysis.SecStrConsensus.SimpleBP;
+import jalview.analysis.WUSSParseException;
+
 /**
  * DOCUMENT ME!
  * 
@@ -1359,8 +1359,21 @@ public class AlignmentAnnotation
   {
     if (properties == null)
     {
-      return Collections.EMPTY_LIST;
+      return Collections.emptyList();
     }
     return properties.keySet();
   }
+
+  /**
+   * Returns the Annotation for the given sequence position (base 1) if any,
+   * else null
+   * 
+   * @param position
+   * @return
+   */
+  public Annotation getAnnotationForPosition(int position)
+  {
+    return sequenceMapping == null ? null : sequenceMapping.get(position);
+
+  }
 }
index 0050666..eb955d6 100755 (executable)
@@ -125,6 +125,10 @@ public class PDBEntry
     return file;
   }
 
+  public void setType(String t)
+  {
+    this.type = t;
+  }
   public void setType(PDBEntry.Type type)
   {
     this.type = type.toString();
index 9cec370..cab1ac7 100755 (executable)
@@ -20,9 +20,6 @@
  */
 package jalview.datamodel;
 
-import jalview.analysis.AlignSeq;
-import jalview.util.StringUtils;
-
 import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.List;
@@ -30,6 +27,9 @@ import java.util.Vector;
 
 import fr.orsay.lri.varna.models.rna.RNA;
 
+import jalview.analysis.AlignSeq;
+import jalview.util.StringUtils;
+
 /**
  * 
  * Implements the SequenceI interface for a char[] based sequence object.
@@ -121,7 +121,7 @@ public class Sequence implements SequenceI
               .println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
       name = "";
     }
-    // Does sequence have the /start-end signiature?
+    // Does sequence have the /start-end signature?
     if (limitrx.search(name))
     {
       name = limitrx.left();
@@ -379,7 +379,8 @@ public class Sequence implements SequenceI
    * @param id
    *          DOCUMENT ME!
    */
-  public void setPDBId(Vector id)
+  @Override
+  public void setPDBId(Vector<PDBEntry> id)
   {
     pdbIds = id;
   }
@@ -389,7 +390,8 @@ public class Sequence implements SequenceI
    * 
    * @return DOCUMENT ME!
    */
-  public Vector getPDBId()
+  @Override
+  public Vector<PDBEntry> getPDBId()
   {
     return pdbIds;
   }
@@ -947,7 +949,7 @@ public class Sequence implements SequenceI
   {
     if (this.annotation == null)
     {
-      this.annotation = new Vector();
+      this.annotation = new Vector<AlignmentAnnotation>();
     }
     if (!this.annotation.contains(annotation))
     {
index 04f3588..38ae372 100755 (executable)
@@ -134,12 +134,13 @@ public interface SequenceI
   public char[] getSequence(int start, int end);
 
   /**
-   * create a new sequence object from start to end of this sequence
+   * create a new sequence object with a subsequence of this one but sharing the
+   * same dataset sequence
    * 
    * @param start
-   *          int index for start position
+   *          int index for start position (base 0, inclusive)
    * @param end
-   *          int index for end position
+   *          int index for end position (base 0, exclusive)
    * 
    * @return SequenceI
    * @note implementations may use getSequence to get the sequence data
index 90702a2..4cf0f13 100755 (executable)
  */
 package jalview.datamodel;
 
-import java.util.*;
+import java.util.Vector;
 
+/**
+ * Data model for an entry returned from a Uniprot query
+ * 
+ * @see uniprot_mapping.xml
+ */
 public class UniprotEntry
 {
 
   UniprotSequence sequence;
 
-  Vector name;
+  Vector<String> name;
 
-  Vector accession;
+  Vector<String> accession;
 
-  Vector feature;
+  Vector<SequenceFeature> feature;
 
-  Vector dbrefs;
+  Vector<PDBEntry> dbrefs;
 
   UniprotProteinName protName;
 
-  public void setAccession(Vector items)
+  public void setAccession(Vector<String> items)
   {
     accession = items;
   }
 
-  public void setFeature(Vector items)
+  public void setFeature(Vector<SequenceFeature> items)
   {
     feature = items;
   }
 
-  public Vector getFeature()
+  public Vector<SequenceFeature> getFeature()
   {
     return feature;
   }
 
-  public Vector getAccession()
+  public Vector<String> getAccession()
   {
     return accession;
   }
@@ -67,12 +72,12 @@ public class UniprotEntry
     return protName;
   }
 
-  public void setName(Vector na)
+  public void setName(Vector<String> na)
   {
     name = na;
   }
 
-  public Vector getName()
+  public Vector<String> getName()
   {
     return name;
   }
@@ -87,12 +92,12 @@ public class UniprotEntry
     sequence = seq;
   }
 
-  public Vector getDbReference()
+  public Vector<PDBEntry> getDbReference()
   {
     return dbrefs;
   }
 
-  public void setDbReference(Vector dbref)
+  public void setDbReference(Vector<PDBEntry> dbref)
   {
     this.dbrefs = dbref;
   }
index 44506a6..f0e38d8 100755 (executable)
  */
 package jalview.datamodel;
 
-import java.util.*;
+import java.util.Vector;
 
+/**
+ * Data model of a retrieved Uniprot entry, as unmarshalled by Castor using a
+ * binding file (uniprot_mapping.xml)
+ */
 public class UniprotFile
 {
-  Vector _items;
+  Vector<UniprotEntry> _items;
 
-  public void setUniprotEntries(Vector items)
+  public void setUniprotEntries(Vector<UniprotEntry> items)
   {
     _items = items;
   }
 
-  public Vector getUniprotEntries()
+  public Vector<UniprotEntry> getUniprotEntries()
   {
     return _items;
   }
index eb353aa..0a317e6 100755 (executable)
  */
 package jalview.datamodel;
 
+import java.util.Vector;
+
+/**
+ * Data model for protein name returned from a Uniprot query
+ * 
+ * Protein names are read from the Uniprot XML element
+ * uniprot/entry/protein/recommendedName/fullName
+ * 
+ * @see uniprot_mapping.xml
+ */
 public class UniprotProteinName
 {
-  /**
-   * internal content storage
-   */
-  private java.util.Vector names;
+  private Vector<String> names;
 
-  public void setName(java.util.Vector names)
+  public void setName(Vector<String> names)
   {
     this.names = names;
   }
 
-  public java.util.Vector getName()
+  public Vector<String> getName()
   {
     return names;
   }
index 6ce751e..1150f1e 100755 (executable)
  */
 package jalview.datamodel;
 
+/**
+ * Data model for the sequence returned by a Uniprot query
+ * 
+ * @see uniprot_mapping.xml
+ */
 public class UniprotSequence
 {
+  private String _content = "";
+
   /**
-   * internal content storage
+   * Sets the content string, omitting any space characters
+   * 
+   * @param seq
    */
-  private java.lang.String _content = "";
-
   public void setContent(String seq)
   {
-    StringBuffer sb = new StringBuffer();
-    for (int i = 0; i < seq.length(); i++)
+    if (seq != null)
     {
-      if (seq.charAt(i) != ' ')
+      StringBuilder sb = new StringBuilder(seq.length());
+      for (int i = 0; i < seq.length(); i++)
       {
-        sb.append(seq.charAt(i));
+        if (seq.charAt(i) != ' ')
+        {
+          sb.append(seq.charAt(i));
+        }
       }
+      _content = sb.toString();
     }
-    _content = sb.toString();
   }
 
   public String getContent()
index 070958a..3737adc 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+/**
+ * Data model for a feature/location/locationElement/basePosition read from an
+ * EMBL query reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class BasePosition
 {
   String type;
index fc57b27..3f890ba 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Vector;
+
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.DBRefSource;
 import jalview.datamodel.FeatureProperties;
@@ -28,11 +33,14 @@ import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.Vector;
-
+/**
+ * Data model for one entry returned from an EMBL query, as marshalled by a
+ * Castor binding file
+ * 
+ * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblEntry
 {
   String accession;
@@ -49,13 +57,11 @@ public class EmblEntry
 
   String lastUpdated;
 
-  Vector keywords;
-
-  Vector refs;
+  Vector<String> keywords;
 
-  Vector dbRefs;
+  Vector<DBRefEntry> dbRefs;
 
-  Vector features;
+  Vector<EmblFeature> features;
 
   EmblSequence sequence;
 
@@ -79,7 +85,7 @@ public class EmblEntry
   /**
    * @return the dbRefs
    */
-  public Vector getDbRefs()
+  public Vector<DBRefEntry> getDbRefs()
   {
     return dbRefs;
   }
@@ -88,7 +94,7 @@ public class EmblEntry
    * @param dbRefs
    *          the dbRefs to set
    */
-  public void setDbRefs(Vector dbRefs)
+  public void setDbRefs(Vector<DBRefEntry> dbRefs)
   {
     this.dbRefs = dbRefs;
   }
@@ -113,7 +119,7 @@ public class EmblEntry
   /**
    * @return the features
    */
-  public Vector getFeatures()
+  public Vector<EmblFeature> getFeatures()
   {
     return features;
   }
@@ -122,7 +128,7 @@ public class EmblEntry
    * @param features
    *          the features to set
    */
-  public void setFeatures(Vector features)
+  public void setFeatures(Vector<EmblFeature> features)
   {
     this.features = features;
   }
@@ -130,7 +136,7 @@ public class EmblEntry
   /**
    * @return the keywords
    */
-  public Vector getKeywords()
+  public Vector<String> getKeywords()
   {
     return keywords;
   }
@@ -139,7 +145,7 @@ public class EmblEntry
    * @param keywords
    *          the keywords to set
    */
-  public void setKeywords(Vector keywords)
+  public void setKeywords(Vector<String> keywords)
   {
     this.keywords = keywords;
   }
@@ -162,23 +168,6 @@ public class EmblEntry
   }
 
   /**
-   * @return the refs
-   */
-  public Vector getRefs()
-  {
-    return refs;
-  }
-
-  /**
-   * @param refs
-   *          the refs to set
-   */
-  public void setRefs(Vector refs)
-  {
-    this.refs = refs;
-  }
-
-  /**
    * @return the releaseCreated
    */
   public String getRCreated()
@@ -190,7 +179,7 @@ public class EmblEntry
    * @param releaseCreated
    *          the releaseCreated to set
    */
-  public void setRcreated(String releaseCreated)
+  public void setRCreated(String releaseCreated)
   {
     this.rCreated = releaseCreated;
   }
index e781a7e..94de28f 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+/**
+ * Data model mapped from any &lt;error&gt; elements returned from an EMBL query
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblError
 {
   String accession;
index 077788c..7f53eb3 100644 (file)
@@ -22,20 +22,27 @@ package jalview.datamodel.xdb.embl;
 
 import java.util.Vector;
 
+import jalview.datamodel.DBRefEntry;
+
+/**
+ * Data model for a &lt;feature&gt; element returned from an EMBL query reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblFeature
 {
   String name;
 
-  Vector dbRefs;
+  Vector<DBRefEntry> dbRefs;
 
-  Vector qualifiers;
+  Vector<Qualifier> qualifiers;
 
-  Vector locations;
+  Vector<EmblFeatureLocations> locations;
 
   /**
    * @return the dbRefs
    */
-  public Vector getDbRefs()
+  public Vector<DBRefEntry> getDbRefs()
   {
     return dbRefs;
   }
@@ -44,7 +51,7 @@ public class EmblFeature
    * @param dbRefs
    *          the dbRefs to set
    */
-  public void setDbRefs(Vector dbRefs)
+  public void setDbRefs(Vector<DBRefEntry> dbRefs)
   {
     this.dbRefs = dbRefs;
   }
@@ -52,7 +59,7 @@ public class EmblFeature
   /**
    * @return the locations
    */
-  public Vector getLocations()
+  public Vector<EmblFeatureLocations> getLocations()
   {
     return locations;
   }
@@ -61,7 +68,7 @@ public class EmblFeature
    * @param locations
    *          the locations to set
    */
-  public void setLocations(Vector locations)
+  public void setLocations(Vector<EmblFeatureLocations> locations)
   {
     this.locations = locations;
   }
@@ -86,7 +93,7 @@ public class EmblFeature
   /**
    * @return the qualifiers
    */
-  public Vector getQualifiers()
+  public Vector<Qualifier> getQualifiers()
   {
     return qualifiers;
   }
@@ -95,7 +102,7 @@ public class EmblFeature
    * @param qualifiers
    *          the qualifiers to set
    */
-  public void setQualifiers(Vector qualifiers)
+  public void setQualifiers(Vector<Qualifier> qualifiers)
   {
     this.qualifiers = qualifiers;
   }
index 10c3634..134ce9e 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+/**
+ * Data model for a feature/location/locationElement read from an EMBL query
+ * reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblFeatureLocElement
 {
   String type;
index 41ba739..eb0bee7 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
-import java.util.Enumeration;
 import java.util.Vector;
 
+/**
+ * Data model for a &lt;loctaion&gt; child element of a &lt;feature&gt; read
+ * from an EMBL query reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblFeatureLocations
 {
-  Vector locElements;
+  Vector<EmblFeatureLocElement> locElements;
 
   String locationType;
 
@@ -68,7 +73,7 @@ public class EmblFeatureLocations
   /**
    * @return the locElements
    */
-  public Vector getLocElements()
+  public Vector<EmblFeatureLocElement> getLocElements()
   {
     return locElements;
   }
@@ -77,7 +82,7 @@ public class EmblFeatureLocations
    * @param locElements
    *          the locElements to set
    */
-  public void setLocElements(Vector locElements)
+  public void setLocElements(Vector<EmblFeatureLocElement> locElements)
   {
     this.locElements = locElements;
   }
@@ -110,12 +115,10 @@ public class EmblFeatureLocations
   {
     int sepos = 0;
     int[] se = new int[locElements.size() * 2];
-    if (locationType.equalsIgnoreCase("single"))
+    if (locationType.equalsIgnoreCase("single")) // TODO: or "simple" ?
     {
-      for (Enumeration le = locElements.elements(); le.hasMoreElements();)
+      for (EmblFeatureLocElement loce : locElements)
       {
-        EmblFeatureLocElement loce = (EmblFeatureLocElement) le
-                .nextElement();
         if (accession == null || loce.accession != null
                 && accession.equals(loce.accession))
         {
@@ -130,10 +133,8 @@ public class EmblFeatureLocations
     }
     else if (locationType.equalsIgnoreCase("join"))
     {
-      for (Enumeration le = locElements.elements(); le.hasMoreElements();)
+      for (EmblFeatureLocElement loce : locElements)
       {
-        EmblFeatureLocElement loce = (EmblFeatureLocElement) le
-                .nextElement();
         if (accession == null || loce.accession != null
                 && accession.equals(loce.accession))
         {
@@ -150,13 +151,17 @@ public class EmblFeatureLocations
     else if (locationType != null)
     {
       if (jalview.bin.Cache.log != null)
+      {
         jalview.bin.Cache.log
                 .error("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
                         + locationType + "'");
+      }
       else
+      {
         System.err
                 .println("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
                         + locationType + "'");
+      }
     }
     // trim range if necessary.
     if (se != null && sepos != se.length)
index 3ca3755..2129054 100644 (file)
@@ -22,22 +22,31 @@ package jalview.datamodel.xdb.embl;
 
 import java.io.File;
 import java.io.FileReader;
+import java.io.PrintWriter;
 import java.io.Reader;
 import java.util.Vector;
 
 import org.exolab.castor.mapping.Mapping;
 import org.exolab.castor.xml.Unmarshaller;
 
+/**
+ * Data model for entries returned from an EMBL query, as marshalled by a Castor
+ * binding file
+ * 
+ * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblFile
 {
-  Vector entries;
+  Vector<EmblEntry> entries;
 
-  Vector errors;
+  Vector<EmblError> errors;
 
   /**
    * @return the entries
    */
-  public Vector getEntries()
+  public Vector<EmblEntry> getEntries()
   {
     return entries;
   }
@@ -46,7 +55,7 @@ public class EmblFile
    * @param entries
    *          the entries to set
    */
-  public void setEntries(Vector entries)
+  public void setEntries(Vector<EmblEntry> entries)
   {
     this.entries = entries;
   }
@@ -54,7 +63,7 @@ public class EmblFile
   /**
    * @return the errors
    */
-  public Vector getErrors()
+  public Vector<EmblError> getErrors()
   {
     return errors;
   }
@@ -63,7 +72,7 @@ public class EmblFile
    * @param errors
    *          the errors to set
    */
-  public void setErrors(Vector errors)
+  public void setErrors(Vector<EmblError> errors)
   {
     this.errors = errors;
   }
@@ -77,7 +86,9 @@ public class EmblFile
   public static EmblFile getEmblFile(File file)
   {
     if (file == null)
+    {
       return null;
+    }
     try
     {
       return EmblFile.getEmblFile(new FileReader(file));
@@ -96,6 +107,7 @@ public class EmblFile
     {
       // 1. Load the mapping information from the file
       Mapping map = new Mapping(record.getClass().getClassLoader());
+
       java.net.URL url = record.getClass().getResource("/embl_mapping.xml");
       map.loadMapping(url);
 
@@ -104,17 +116,19 @@ public class EmblFile
       try
       {
         // uncomment to DEBUG EMBLFile reading
-        if (((String) jalview.bin.Cache.getDefault(
-                jalview.bin.Cache.CASTORLOGLEVEL, "debug"))
+        if (jalview.bin.Cache.getDefault(
+                jalview.bin.Cache.CASTORLOGLEVEL, "debug")
                 .equalsIgnoreCase("DEBUG"))
+        {
           unmar.setDebug(jalview.bin.Cache.log.isDebugEnabled());
+        }
       } catch (Exception e)
       {
       }
-      ;
       unmar.setIgnoreExtraElements(true);
+      unmar.setIgnoreExtraAttributes(true);
       unmar.setMapping(map);
-
+      unmar.setLogWriter(new PrintWriter(System.out));
       record = (EmblFile) unmar.unmarshal(file);
     } catch (Exception e)
     {
@@ -124,23 +138,4 @@ public class EmblFile
 
     return record;
   }
-
-  public static void main(String args[])
-  {
-    File mf = null;
-    if (args.length == 1)
-    {
-      mf = new File(args[0]);
-    }
-    if (!mf.exists())
-    {
-      mf = new File(
-              "C:\\Documents and Settings\\JimP\\workspace-3.2\\Jalview Release\\schemas\\embleRecordV1.1.xml");
-    }
-    EmblFile myfile = EmblFile.getEmblFile(mf);
-    if (myfile != null && myfile.entries != null
-            && myfile.entries.size() > 0)
-      System.out.println(myfile.entries.size() + " Records read. (" + mf
-              + ")");
-  }
 }
index 406ef2e..2a6fa84 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+/**
+ * Data model for the sequence extracted from an EMBL query reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class EmblSequence
 {
   String version;
index 9ab7f38..851dd48 100644 (file)
  */
 package jalview.datamodel.xdb.embl;
 
+/**
+ * Data model for a &lt;qualifier&gt; child element of a &lt;feature&gt; read
+ * from an EMBL query reply
+ * 
+ * @see embl_mapping.xml
+ */
 public class Qualifier
 {
   String name;
@@ -64,6 +70,7 @@ public class Qualifier
 
   public void addEvidence(String qevidence)
   {
+    // TODO - not used? can remove?
     if (evidence == null)
     {
       evidence = new String[1];
@@ -79,6 +86,7 @@ public class Qualifier
 
   public void addValues(String value)
   {
+    // TODO - not used? can remove?
     if (values == null)
     {
       values = new String[1];
index 58ee42d..92e863a 100644 (file)
  */
 package jalview.ws.dbsources;
 
+import java.io.File;
+
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceI;
 import jalview.datamodel.xdb.embl.EmblEntry;
+import jalview.datamodel.xdb.embl.EmblFile;
 import jalview.util.MessageManager;
 import jalview.ws.ebi.EBIFetchClient;
 
-import java.io.File;
-import java.util.Iterator;
-
 public abstract class EmblXmlSource extends EbiFileRetrievedProxy
 {
 
   /**
    * Last properly parsed embl file.
    */
-  public jalview.datamodel.xdb.embl.EmblFile efile = null;
+  public EmblFile efile = null;
 
   public EmblXmlSource()
   {
@@ -94,7 +94,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
       file = reply.getAbsolutePath();
       if (reply.length() > 25)
       {
-        efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply);
+        efile = EmblFile.getEmblFile(reply);
       }
       else
       {
@@ -103,19 +103,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
     }
     if (efile != null)
     {
-      for (Iterator i = efile.getEntries().iterator(); i.hasNext();)
+      for (EmblEntry entry : efile.getEntries())
       {
-        EmblEntry entry = (EmblEntry) i.next();
-        SequenceI[] seqparts = entry.getSequences(false, true, emprefx); // TODO:
-        // use
-        // !fetchNa,!fetchPeptide
-        // here
-        // instead
-        // -
-        // see
-        // todo
-        // in
-        // emblEntry
+        SequenceI[] seqparts = entry.getSequences(false, true, emprefx);
+        // TODO: use !fetchNa,!fetchPeptide here instead - see todo in EmblEntry
         if (seqparts != null)
         {
           SequenceI[] newseqs = null;
@@ -136,8 +127,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy
           }
           for (int j = 0; j < seqparts.length; si++, j++)
           {
-            newseqs[si] = seqparts[j].deriveSequence(); // place DBReferences on
-            // dataset and refer
+            newseqs[si] = seqparts[j].deriveSequence();
+            // place DBReferences on dataset and refer
           }
           seqs = newseqs;
 
index f0e5de0..6a871ee 100644 (file)
@@ -22,7 +22,7 @@ package jalview.ws.dbsources;
 
 import java.io.File;
 import java.io.FileReader;
-import java.util.Enumeration;
+import java.io.Reader;
 import java.util.Vector;
 
 import org.exolab.castor.xml.Unmarshaller;
@@ -48,6 +48,16 @@ import jalview.ws.seqfetcher.DbSourceProxyImpl;
  */
 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
 {
+
+  private static final String BAR_DELIMITER = "|";
+
+  private static final String NEWLINE = "\n";
+
+  private static org.exolab.castor.mapping.Mapping map;
+
+  /**
+   * Constructor
+   */
   public Uniprot()
   {
     super();
@@ -96,11 +106,15 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
     return "0"; // we really don't know what version we're on.
   }
 
-  private EBIFetchClient ebi = null;
-
-  private static org.exolab.castor.mapping.Mapping map;
-
-  public Vector getUniprotEntries(File file)
+  /**
+   * Reads a file containing the reply to the EBI Fetch Uniprot data query,
+   * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry
+   * data models (mapped from &lt;entry&gt; elements)
+   * 
+   * @param fileReader
+   * @return
+   */
+  public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)
   {
     UniprotFile uni = new UniprotFile();
     try
@@ -118,9 +132,9 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
       Unmarshaller unmar = new Unmarshaller(uni);
       unmar.setIgnoreExtraElements(true);
       unmar.setMapping(map);
-      if (file != null)
+      if (fileReader != null)
       {
-        uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
+        uni = (UniprotFile) unmar.unmarshal(fileReader);
       }
     } catch (Exception e)
     {
@@ -143,48 +157,28 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
       queries = queries.toUpperCase().replaceAll(
               "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
       Alignment al = null;
-      ebi = new EBIFetchClient();
-      StringBuffer result = new StringBuffer();
+      EBIFetchClient ebi = new EBIFetchClient();
       // uniprotxml parameter required since december 2007
       // uniprotkb dbname changed introduced december 2008
       File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
               null);
-      Vector entries = getUniprotEntries(file);
+      Vector<UniprotEntry> entries = getUniprotEntries(new FileReader(file));
 
       if (entries != null)
       {
+        /*
+         * If Castor binding included sequence@length, we could guesstimate the
+         * size of buffer to hold the alignment
+         */
+        StringBuffer result = new StringBuffer(128);
         // First, make the new sequences
-        Enumeration en = entries.elements();
-        while (en.hasMoreElements())
+        for (UniprotEntry entry : entries)
         {
-          UniprotEntry entry = (UniprotEntry) en.nextElement();
-
-          StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
-          Enumeration en2 = entry.getAccession().elements();
-          while (en2.hasMoreElements())
-          {
-            name.append("|");
-            name.append(en2.nextElement());
-          }
-          en2 = entry.getName().elements();
-          while (en2.hasMoreElements())
-          {
-            name.append("|");
-            name.append(en2.nextElement());
-          }
-
-          if (entry.getProtein() != null
-                  && entry.getProtein().getName() != null)
-          {
-            for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++)
-            {
-              name.append(" " + entry.getProtein().getName().elementAt(nm));
-            }
-          }
-
-          result.append(name + "\n"
-                  + entry.getUniprotSequence().getContent() + "\n");
+          StringBuilder name = constructSequenceFastaHeader(entry);
 
+          result.append(name).append(NEWLINE)
+                  .append(entry.getUniprotSequence().getContent())
+                  .append(NEWLINE);
         }
 
         // Then read in the features and apply them to the dataset
@@ -209,63 +203,95 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
   }
 
   /**
+   * Construct a Fasta-format sequence header by concatenating the source,
+   * accession id(s) and name(s), delimited by '|', plus any protein names, now
+   * with space rather than bar delimiter
+   * 
+   * @param entry
+   * @return
+   */
+  public static StringBuilder constructSequenceFastaHeader(
+          UniprotEntry entry)
+  {
+    StringBuilder name = new StringBuilder(32);
+    name.append(">UniProt/Swiss-Prot");
+    for (String accessionId : entry.getAccession())
+    {
+      name.append(BAR_DELIMITER);
+      name.append(accessionId);
+    }
+    for (String n : entry.getName())
+    {
+      name.append(BAR_DELIMITER);
+      name.append(n);
+    }
+
+    if (entry.getProtein() != null
+            && entry.getProtein().getName() != null)
+    {
+      for (String nm : entry.getProtein().getName())
+      {
+        name.append(" ").append(nm);
+      }
+    }
+    return name;
+  }
+
+  /**
    * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
    * 
    * @param al
    *          - a sequence of n sequences
    * @param entries
-   *          a seuqence of n uniprot entries to be analysed.
+   *          a list of n uniprot entries to be analysed.
    */
-  public void addUniprotXrefs(Alignment al, Vector entries)
+  public void addUniprotXrefs(Alignment al, Vector<UniprotEntry> entries)
   {
+    final String dbVersion = getDbVersion();
+
     for (int i = 0; i < entries.size(); i++)
     {
-      UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
-      Enumeration e = entry.getDbReference().elements();
-      Vector<PDBEntry> onlyPdbEntries = new Vector();
-      Vector dbxrefs = new Vector();
-      while (e.hasMoreElements())
+      UniprotEntry entry = entries.elementAt(i);
+      Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
+      Vector<DBRefEntry> dbxrefs = new Vector<DBRefEntry>();
+
+      for (PDBEntry pdb : entry.getDbReference())
       {
-        PDBEntry pdb = (PDBEntry) e.nextElement();
         DBRefEntry dbr = new DBRefEntry();
         dbr.setSource(pdb.getType());
         dbr.setAccessionId(pdb.getId());
-        dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion());
+        dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
         dbxrefs.addElement(dbr);
-        if (!pdb.getType().equals("PDB"))
+        if ("PDB".equals(pdb.getType()))
         {
-          continue;
+          onlyPdbEntries.addElement(pdb);
         }
-
-        onlyPdbEntries.addElement(pdb);
       }
+
       SequenceI sq = al.getSequenceAt(i);
       while (sq.getDatasetSequence() != null)
       {
         sq = sq.getDatasetSequence();
       }
 
-      Enumeration en2 = entry.getAccession().elements();
-      while (en2.hasMoreElements())
+      for (String accessionId : entry.getAccession())
       {
-        // we always add as uniprot if we retrieved from uniprot or uniprot name
-        sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2
-                .nextElement().toString()));
+        /*
+         * add as uniprot whether retrieved from uniprot or uniprot_name
+         */
+        sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
+                accessionId));
       }
-      en2 = dbxrefs.elements();
-      while (en2.hasMoreElements())
-      {
-        // we always add as uniprot if we retrieved from uniprot or uniprot name
-        sq.addDBRef((DBRefEntry) en2.nextElement());
 
+      for (DBRefEntry dbRef : dbxrefs)
+      {
+        sq.addDBRef(dbRef);
       }
       sq.setPDBId(onlyPdbEntries);
       if (entry.getFeature() != null)
       {
-        e = entry.getFeature().elements();
-        while (e.hasMoreElements())
+        for (SequenceFeature sf : entry.getFeature())
         {
-          SequenceFeature sf = (SequenceFeature) e.nextElement();
           sf.setFeatureGroup("Uniprot");
           sq.addSequenceFeature(sf);
         }
index cb61cd4..a785899 100644 (file)
  */
 package jalview.ws.ebi;
 
-import jalview.util.MessageManager;
-
 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileOutputStream;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.List;
 import java.util.StringTokenizer;
 
+import jalview.util.MessageManager;
+
 /**
  * DOCUMENT ME!
  * 
@@ -83,6 +85,18 @@ public class EBIFetchClient
     throw new Error(MessageManager.getString("error.not_yet_implemented"));
   }
 
+  /**
+   * Send an HTTP fetch request to EBI and save the reply in a temporary file.
+   * 
+   * @param ids
+   *          the query formatted as db:query1;query2;query3
+   * @param f
+   *          the format wanted
+   * @param s
+   *          - unused parameter
+   * @return the file holding the response
+   * @throws OutOfMemoryError
+   */
   public File fetchDataAsFile(String ids, String f, String s)
           throws OutOfMemoryError
   {
@@ -111,7 +125,7 @@ public class EBIFetchClient
    * @param f
    *          raw/xml
    * @param s
-   *          ?
+   *          not used - remove?
    * 
    * @return Raw string array result of query set
    */
@@ -180,7 +194,7 @@ public class EBIFetchClient
               + db.toLowerCase() + "/" + ids.toLowerCase()
               + (f != null ? "/" + f : ""));
 
-      BufferedInputStream is = new BufferedInputStream(rcall.openStream());
+      InputStream is = new BufferedInputStream(rcall.openStream());
       if (outFile != null)
       {
         FileOutputStream fio = new FileOutputStream(outFile);
@@ -197,7 +211,7 @@ public class EBIFetchClient
       {
         BufferedReader br = new BufferedReader(new InputStreamReader(is));
         String rtn;
-        ArrayList<String> arl = new ArrayList<String>();
+        List<String> arl = new ArrayList<String>();
         while ((rtn = br.readLine()) != null)
         {
           arl.add(rtn);
index 9ef1b9a..6c06955 100644 (file)
@@ -22,6 +22,7 @@ package jalview.analysis;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
@@ -89,13 +90,16 @@ public class AlignmentUtilsTests
           "GGGTCAGGCAGT\n";
   // @formatter:on
 
-  public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+  // public static Sequence ts=new
+  // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+  public static Sequence ts = new Sequence("short",
+          "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
 
   @Test
-  public void testExpandFlanks()
+  public void testExpandContext()
   {
     AlignmentI al = new Alignment(new Sequence[] {});
-    for (int i=4;i<14;i+=3)
+    for (int i = 4; i < 14; i += 2)
     {
       SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7);
       al.addSequence(s1);
@@ -103,18 +107,135 @@ public class AlignmentUtilsTests
     System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true));
     for (int flnk=-1;flnk<25; flnk++)
     {
-      AlignmentI exp;
-      System.out.println("\nFlank size: "+flnk);
-      System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true));
-      if (flnk==-1) {
-        for (SequenceI sq:exp.getSequences())
+      AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
+      System.out.println("\nFlank size: " + flnk);
+      System.out.println(new AppletFormatAdapter().formatSequences(
+              "Clustal", exp, true));
+      if (flnk == -1)
       {
+        /*
+         * Full expansion to complete sequences
+         */
+        for (SequenceI sq : exp.getSequences())
+        {
           String ung = sq.getSequenceAsString().replaceAll("-+", "");
-          assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString()));
+          final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
+                  + ung
+                  + "\n"
+                  + sq.getDatasetSequence().getSequenceAsString();
+          assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
+                  .getSequenceAsString()));
+        }
       }
+      else if (flnk == 24)
+      {
+        /*
+         * Last sequence is fully expanded, others have leading gaps to match
+         */
+        assertTrue(exp.getSequenceAt(4).getSequenceAsString()
+                .startsWith("abc"));
+        assertTrue(exp.getSequenceAt(3).getSequenceAsString()
+                .startsWith("--abc"));
+        assertTrue(exp.getSequenceAt(2).getSequenceAsString()
+                .startsWith("----abc"));
+        assertTrue(exp.getSequenceAt(1).getSequenceAsString()
+                .startsWith("------abc"));
+        assertTrue(exp.getSequenceAt(0).getSequenceAsString()
+                .startsWith("--------abc"));
       }
     }
-    }    
+  }
+
+  /**
+   * Test that annotations are correctly adjusted by expandContext
+   */
+  @Test
+  public void testExpandContext_annotation()
+  {
+    AlignmentI al = new Alignment(new Sequence[]
+    {});
+    SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
+    // subsequence DEF:
+    SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
+    al.addSequence(seq1);
+
+    /*
+     * Annotate DEF with 4/5/6 respectively
+     */
+    Annotation[] anns = new Annotation[]
+    { new Annotation(4), new Annotation(5), new Annotation(6) };
+    AlignmentAnnotation ann = new AlignmentAnnotation("SS",
+            "secondary structure", anns);
+    seq1.addAlignmentAnnotation(ann);
+
+    /*
+     * The annotations array should match aligned positions
+     */
+    assertEquals(3, ann.annotations.length);
+    assertEquals(4, ann.annotations[0].value, 0.001);
+    assertEquals(5, ann.annotations[1].value, 0.001);
+    assertEquals(6, ann.annotations[2].value, 0.001);
+
+    /*
+     * Check annotation to sequence position mappings before expanding the
+     * sequence; these are set up in Sequence.addAlignmentAnnotation ->
+     * Annotation.setSequenceRef -> createSequenceMappings
+     */
+    assertNull(ann.getAnnotationForPosition(1));
+    assertNull(ann.getAnnotationForPosition(2));
+    assertNull(ann.getAnnotationForPosition(3));
+    assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+    assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+    assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+    assertNull(ann.getAnnotationForPosition(7));
+    assertNull(ann.getAnnotationForPosition(8));
+    assertNull(ann.getAnnotationForPosition(9));
+
+    /*
+     * Expand the subsequence to the full sequence abcDEFghi
+     */
+    AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
+    // FIXME expandContext adds an unnecessary gap; need tests to cover all
+    // cases for which 'maxOffset' is computed
+    assertEquals("-abcDEFghi", expanded.getSequenceAt(0)
+            .getSequenceAsString());
+
+    /*
+     * Confirm the alignment and sequence have the same SS annotation,
+     * referencing the expanded sequence
+     */
+    ann = expanded.getSequenceAt(0).getAnnotation()[0];
+    assertSame(ann, expanded.getAlignmentAnnotation()[0]);
+    assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
+
+    /*
+     * The annotations array should have null values except for annotated
+     * positions
+     */
+    assertNull(ann.annotations[0]);
+    assertNull(ann.annotations[1]);
+    assertNull(ann.annotations[2]);
+    assertNull(ann.annotations[3]);
+    assertEquals(4, ann.annotations[4].value, 0.001);
+    assertEquals(5, ann.annotations[5].value, 0.001);
+    assertEquals(6, ann.annotations[6].value, 0.001);
+    assertNull(ann.annotations[7]);
+    assertNull(ann.annotations[8]);
+    assertNull(ann.annotations[9]);
+
+    /*
+     * sequence position mappings should be unchanged
+     */
+    assertNull(ann.getAnnotationForPosition(1));
+    assertNull(ann.getAnnotationForPosition(2));
+    assertNull(ann.getAnnotationForPosition(3));
+    assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+    assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+    assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+    assertNull(ann.getAnnotationForPosition(7));
+    assertNull(ann.getAnnotationForPosition(8));
+    assertNull(ann.getAnnotationForPosition(9));
+  }
 
   /**
    * Test method that returns a map of lists of sequences by sequence name.
index f23b3d2..7c1aa81 100644 (file)
@@ -1,11 +1,13 @@
 package jalview.datamodel;
 
 import static org.junit.Assert.assertEquals;
-import jalview.analysis.AlignSeq;
-import jalview.io.AppletFormatAdapter;
+import static org.junit.Assert.assertNull;
 
 import org.junit.Test;
 
+import jalview.analysis.AlignSeq;
+import jalview.io.AppletFormatAdapter;
+
 public class AlignmentAnnotationTests
 {
   @Test
@@ -153,7 +155,48 @@ public class AlignmentAnnotationTests
                       : "Out of range");
       assertEquals("Position " + p + " " + alm1 + " " + alm2, alm1, alm2);
     }
-    // new jalview.io.FormatAdapter().formatSequences("STOCKHOLM", n)
   }
 
+  @Test
+  public void testAdjustForAlignment()
+  {
+    SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
+    seq.createDatasetSequence();
+
+    /*
+     * Annotate positions 3/4/5 (CDE) with values 1/2/3
+     */
+    Annotation[] anns = new Annotation[]
+    { null, null, new Annotation(1), new Annotation(2), new Annotation(3) };
+    AlignmentAnnotation ann = new AlignmentAnnotation("SS",
+            "secondary structure", anns);
+    seq.addAlignmentAnnotation(ann);
+
+    /*
+     * Check annotation map before modifying aligned sequence
+     */
+    assertNull(ann.getAnnotationForPosition(1));
+    assertNull(ann.getAnnotationForPosition(2));
+    assertNull(ann.getAnnotationForPosition(6));
+    assertNull(ann.getAnnotationForPosition(7));
+    assertEquals(1, ann.getAnnotationForPosition(3).value, 0.001d);
+    assertEquals(2, ann.getAnnotationForPosition(4).value, 0.001d);
+    assertEquals(3, ann.getAnnotationForPosition(5).value, 0.001d);
+
+    /*
+     * Trim the displayed sequence to BCD and adjust annotations
+     */
+    seq.setSequence("BCD");
+    seq.setStart(2);
+    seq.setEnd(4);
+    ann.adjustForAlignment();
+
+    /*
+     * Should now have annotations for aligned positions 2, 3Q (CD) only
+     */
+    assertEquals(3, ann.annotations.length);
+    assertNull(ann.annotations[0]);
+    assertEquals(1, ann.annotations[1].value, 0.001);
+    assertEquals(2, ann.annotations[2].value, 0.001);
+  }
 }
index ea23bfe..5e73bbc 100644 (file)
@@ -310,4 +310,90 @@ public class SequenceTest
     seq.getDatasetSequence().setDatasetSequence(seq); // loop!
     assertNull(seq.getSequenceFeatures());
   }
+
+  /**
+   * Test the method that returns an array, indexed by sequence position, whose
+   * entries are the residue positions at the sequence position (or to the right
+   * if a gap)
+   */
+  @Test
+  public void testFindPositionMap()
+  {
+    /*
+     * Note: Javadoc for findPosition says it returns the residue position to
+     * the left of a gapped position; in fact it returns the position to the
+     * right. Also it returns a non-existent residue position for a gap beyond
+     * the sequence.
+     */
+    Sequence seq = new Sequence("TestSeq", "AB.C-D E.");
+    int[] map = seq.findPositionMap();
+    assertEquals(Arrays.toString(new int[]
+    { 1, 2, 3, 3, 4, 4, 5, 5, 6 }), Arrays.toString(map));
+  }
+
+  /**
+   * Test for getSubsequence
+   */
+  @Test
+  public void testGetSubsequence()
+  {
+    SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
+    seq.createDatasetSequence();
+
+    // positions are base 0, end position is exclusive
+    SequenceI subseq = seq.getSubSequence(2, 4);
+
+    assertEquals("CD", subseq.getSequenceAsString());
+    // start/end are base 1 positions
+    assertEquals(3, subseq.getStart());
+    assertEquals(4, subseq.getEnd());
+    // subsequence shares the full dataset sequence
+    assertSame(seq.getDatasetSequence(), subseq.getDatasetSequence());
+  }
+
+  /**
+   * Test for deriveSequence applied to a sequence with a dataset
+   */
+  @Test
+  public void testDeriveSequence_existingDataset()
+  {
+    SequenceI seq = new Sequence("Seq1", "CD");
+    seq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
+    seq.setStart(3);
+    seq.setEnd(4);
+    SequenceI derived = seq.deriveSequence();
+    assertEquals("CD", derived.getSequenceAsString());
+    assertSame(seq.getDatasetSequence(), derived.getDatasetSequence());
+  }
+
+  /**
+   * Test for deriveSequence applied to an ungapped sequence with no dataset
+   */
+  @Test
+  public void testDeriveSequence_noDatasetUngapped()
+  {
+    SequenceI seq = new Sequence("Seq1", "ABCDEF");
+    assertEquals(1, seq.getStart());
+    assertEquals(6, seq.getEnd());
+    SequenceI derived = seq.deriveSequence();
+    assertEquals("ABCDEF", derived.getSequenceAsString());
+    assertEquals("ABCDEF", derived.getDatasetSequence()
+            .getSequenceAsString());
+  }
+
+  /**
+   * Test for deriveSequence applied to a gapped sequence with no dataset
+   */
+  @Test
+  public void testDeriveSequence_noDatasetGapped()
+  {
+    SequenceI seq = new Sequence("Seq1", "AB-C.D EF");
+    assertEquals(1, seq.getStart());
+    assertEquals(6, seq.getEnd());
+    assertNull(seq.getDatasetSequence());
+    SequenceI derived = seq.deriveSequence();
+    assertEquals("AB-C.D EF", derived.getSequenceAsString());
+    assertEquals("ABCDEF", derived.getDatasetSequence()
+            .getSequenceAsString());
+  }
 }
diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java
new file mode 100644 (file)
index 0000000..e89245c
--- /dev/null
@@ -0,0 +1,127 @@
+package jalview.datamodel.xdb.embl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.StringReader;
+import java.util.Vector;
+
+import org.junit.Test;
+
+import jalview.datamodel.DBRefEntry;
+
+public class EmblFileTest
+{
+  // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+  private static final String TESTDATA = 
+          "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+          + "<EMBL_Services>"
+          + "<entry accession=\"X53828\" version=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\">"
+          + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
+          + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
+          + "<dbreference db=\"EuropePMC\" primary=\"PMC1460223\" secondary=\"9649548\" />"
+          + "<feature name=\"CDS\"><dbreference db=\"GOA\" primary=\"P00340\" secondary=\"2.1\" /><dbreference db=\"InterPro\" primary=\"IPR001236\" />"
+          + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
+          + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
+          + "<location type=\"single\" complement=\"true\">"
+              + "<locationElement type=\"range\" accession=\"X53828\" version=\"1\" complement=\"false\">"
+          + "<basePosition type=\"simple\">60</basePosition><basePosition type=\"join\">1058</basePosition>"
+              + "</locationElement></location></feature>"
+          + "<sequence type=\"mRNA\" version=\"2\">GTGACG</sequence></entry></EMBL_Services>";
+
+  @Test
+  public void testGetEmblFile()
+  {
+    Vector<EmblEntry> entries = EmblFile.getEmblFile(
+            new StringReader(TESTDATA)).getEntries();
+    assertEquals(1, entries.size());
+    EmblEntry entry = entries.get(0);
+
+    assertEquals("X53828", entry.getAccession());
+    assertEquals(
+            "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
+            entry.getDesc());
+    assertEquals("2005-04-18", entry.getLastUpdated());
+
+    /*
+     * FIXME these assertions fail - values are null - why?? Adding or removing
+     * attributes in the test XML modifies behaviour. eg. inserting an attribute
+     * _before_ lastUpdated results in a null value in this field.
+     */
+    // assertEquals("25", entry.getRCreated());
+    // assertEquals("83", entry.getRLastUpdated());
+
+    assertEquals(2, entry.getKeywords().size());
+    assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0));
+    assertEquals("chutney", entry.getKeywords().get(1));
+
+    /*
+     * dbrefs
+     */
+    assertEquals(1, entry.getDbRefs().size());
+    DBRefEntry dbref = entry.getDbRefs().get(0);
+    assertEquals("EuropePMC", dbref.getSource());
+    assertEquals("PMC1460223", dbref.getAccessionId());
+    assertEquals("9649548", dbref.getVersion());
+
+    /*
+     * sequence features
+     */
+    assertEquals(1, entry.getFeatures().size());
+    EmblFeature ef = entry.getFeatures().get(0);
+    assertEquals("CDS", ef.getName());
+    assertEquals(2, ef.getDbRefs().size());
+    dbref = ef.getDbRefs().get(0);
+    assertEquals("GOA", dbref.getSource());
+    assertEquals("P00340", dbref.getAccessionId());
+    assertEquals("2.1", dbref.getVersion());
+    dbref = ef.getDbRefs().get(1);
+    assertEquals("InterPro", dbref.getSource());
+    assertEquals("IPR001236", dbref.getAccessionId());
+    assertEquals("", dbref.getVersion());
+    assertEquals(2, ef.getQualifiers().size());
+
+    // feature qualifiers
+    Qualifier q = ef.getQualifiers().get(0);
+    assertEquals("note", q.getName());
+    assertEquals(2, q.getValues().length);
+    assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]);
+    assertEquals("pickle", q.getValues()[1]);
+    assertNull(q.getEvidence());
+    q = ef.getQualifiers().get(1);
+    assertEquals("translation", q.getName());
+    assertEquals(1, q.getValues().length);
+    assertEquals("MSLKDHLIHN", q.getValues()[0]);
+    assertEquals(1, q.getEvidence().length);
+    assertEquals("Keith", q.getEvidence()[0]);
+
+    // feature locations
+    assertEquals(1, ef.getLocations().size());
+    EmblFeatureLocations fl = ef.getLocations().get(0);
+    assertEquals("single", fl.getLocationType());
+    assertTrue(fl.isLocationComplement());
+    assertEquals(1, fl.getLocElements().size());
+    EmblFeatureLocElement le = fl.getLocElements().get(0);
+    assertEquals("range", le.getType());
+    assertEquals("X53828", le.getAccession());
+    assertEquals("1", le.getVersion());
+    assertFalse(le.isComplement());
+    assertEquals(2, le.getBasePositions().length);
+    BasePosition bp = le.getBasePositions()[0];
+    assertEquals("simple", bp.getType());
+    assertEquals("60", bp.getPos());
+    bp = le.getBasePositions()[1];
+    assertEquals("join", bp.getType());
+    assertEquals("1058", bp.getPos());
+
+    /*
+     * Sequence
+     */
+    EmblSequence seq = entry.getSequence();
+    assertEquals("mRNA", seq.getType());
+    assertEquals("2", seq.getVersion());
+    assertEquals("GTGACG", seq.getSequence());
+  }
+}
diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java
new file mode 100644 (file)
index 0000000..7325527
--- /dev/null
@@ -0,0 +1,124 @@
+package jalview.ws.dbsources;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Vector;
+
+import org.junit.Test;
+
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.UniprotEntry;
+
+public class UniprotTest
+{
+  // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
+  private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
+          + "<uniprot>"
+          + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
+          + "<accession>A9CKP4</accession>"
+          + "<accession>A9CKP5</accession>"
+          + "<name>A9CKP4_AGRT5</name>"
+          + "<name>A9CKP4_AGRT6</name>"
+          + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName><fullName>Henry</fullName></recommendedName></protein>"
+          + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
+          + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
+          + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
+          + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
+          + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
+          + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
+          + "</uniprot>";
+
+  /**
+   * Test the method that unmarshals XML to a Uniprot model
+   */
+  @Test
+  public void testGetUniprotEntries()
+  {
+    Uniprot u = new Uniprot();
+    Reader reader = new StringReader(UNIPROT_XML);
+    Vector<UniprotEntry> entries = u.getUniprotEntries(reader);
+    assertEquals(1, entries.size());
+    UniprotEntry entry = entries.get(0);
+    assertEquals(2, entry.getName().size());
+    assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
+    assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
+    assertEquals(2, entry.getAccession().size());
+    assertEquals("A9CKP4", entry.getAccession().get(0));
+    assertEquals("A9CKP5", entry.getAccession().get(1));
+
+    /*
+     * UniprotSequence drops any space characters
+     */
+    assertEquals("MHAPLVSKDL", entry.getUniprotSequence()
+            .getContent());
+
+    assertEquals(2, entry.getProtein().getName().size());
+    assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
+            .getName().get(0));
+    assertEquals("Henry", entry.getProtein().getName().get(1));
+
+    /*
+     * Check sequence features
+     */
+    Vector<SequenceFeature> features = entry.getFeature();
+    assertEquals(3, features.size());
+    SequenceFeature sf = features.get(0);
+    assertEquals("signal peptide", sf.getType());
+    assertNull(sf.getDescription());
+    assertNull(sf.getStatus());
+    assertEquals(1, sf.getPosition()); // wrong - Castor bug??
+    assertEquals(1, sf.getBegin());
+    assertEquals(18, sf.getEnd());
+    sf = features.get(1);
+    assertEquals("propeptide", sf.getType());
+    assertEquals("Activation peptide", sf.getDescription());
+    assertEquals(19, sf.getPosition()); // wrong - Castor bug??
+    assertEquals(19, sf.getBegin());
+    assertEquals(20, sf.getEnd());
+    sf = features.get(2);
+    assertEquals("chain", sf.getType());
+    assertEquals("Granzyme B", sf.getDescription());
+    assertEquals(21, sf.getPosition()); // wrong - Castor bug??
+    assertEquals(21, sf.getBegin());
+    assertEquals(247, sf.getEnd());
+
+    /*
+     * Check cross-references
+     */
+    Vector<PDBEntry> xrefs = entry.getDbReference();
+    assertEquals(2, xrefs.size());
+
+    PDBEntry xref = xrefs.get(0);
+    assertEquals("2FSQ", xref.getId());
+    assertEquals("PDB", xref.getType());
+    assertEquals(2, xref.getProperty().size());
+    assertEquals("X-ray", xref.getProperty().get("method"));
+    assertEquals("1.40", xref.getProperty().get("resolution"));
+
+    xref = xrefs.get(1);
+    assertEquals("2FSR", xref.getId());
+    assertEquals("PDBsum", xref.getType());
+    assertNull(xref.getProperty());
+  }
+
+  /**
+   * Test the method that formats the sequence name in Fasta style
+   */
+  @Test
+  public void testConstructSequenceFastaHeader()
+  {
+    Uniprot u = new Uniprot();
+    Reader reader = new StringReader(UNIPROT_XML);
+    Vector<UniprotEntry> entries = u.getUniprotEntries(reader);
+    UniprotEntry entry = entries.get(0);
+
+    // source + accession ids + names + protein names
+    String expectedName = ">UniProt/Swiss-Prot|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6 Mitogen-activated protein kinase 13 Henry";
+    assertEquals(expectedName, Uniprot.constructSequenceFastaHeader(entry)
+            .toString());
+  }
+}