Merge branch 'develop' into features/JAL-2446NCList
[jalview.git] / src / jalview / datamodel / xdb / embl / EmblEntry.java
index 3ba36ca..c3d4e66 100644 (file)
@@ -48,8 +48,7 @@ import java.util.regex.Pattern;
  * Data model for one entry returned from an EMBL query, as marshalled by a
  * Castor binding file
  * 
- * For example:
- * http://www.ebi.ac.uk/ena/data/view/J03321&display=xml
+ * For example: http://www.ebi.ac.uk/ena/data/view/J03321&display=xml
  * 
  * @see embl_mapping.xml
  */
@@ -200,7 +199,6 @@ public class EmblEntry
     retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
             new int[] { 1, dna.getLength() }, 1, 1));
 
-
     /*
      * transform EMBL Database refs to canonical form
      */
@@ -298,7 +296,8 @@ public class EmblEntry
         if (qname.equals("translation"))
         {
           // remove all spaces (precompiled String.replaceAll(" ", ""))
-          translation = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll("");
+          translation = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll(
+                  "");
         }
         else if (qname.equals("protein_id"))
         {
@@ -444,13 +443,27 @@ public class EmblEntry
       /*
        * add cds features to dna sequence
        */
-      for (int xint = 0; exons != null && xint < exons.length; xint += 2)
+      String cds = feature.getName(); // "CDS"
+      for (int xint = 0; exons != null && xint < exons.length - 1; xint += 2)
       {
-        SequenceFeature sf = makeCdsFeature(exons, xint, proteinName,
-                proteinId, vals, codonStart);
-        sf.setType(feature.getName()); // "CDS"
+        int exonStart = exons[xint];
+        int exonEnd = exons[xint + 1];
+        int begin = Math.min(exonStart, exonEnd);
+        int end = Math.max(exonStart, exonEnd);
+        int exonNumber = xint / 2 + 1;
+        String desc = String.format("Exon %d for protein '%s' EMBLCDS:%s",
+                exonNumber, proteinName, proteinId);
+
+        SequenceFeature sf = makeCdsFeature(cds, desc, begin, end,
+                sourceDb, vals);
+
         sf.setEnaLocation(feature.getLocation());
-        sf.setFeatureGroup(sourceDb);
+        boolean forwardStrand = exonStart <= exonEnd;
+        sf.setStrand(forwardStrand ? "+" : "-");
+        sf.setPhase(String.valueOf(codonStart - 1));
+        sf.setValue(FeatureProperties.EXONPOS, exonNumber);
+        sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
+
         dna.addSequenceFeature(sf);
       }
     }
@@ -469,13 +482,14 @@ public class EmblEntry
          */
         String source = DBRefUtils.getCanonicalName(ref.getSource());
         ref.setSource(source);
-        DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref
-                .getAccessionId());
+        DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(),
+                ref.getVersion(), ref.getAccessionId());
         if (source.equals(DBRefSource.UNIPROT))
         {
           String proteinSeqName = DBRefSource.UNIPROT + "|"
                   + ref.getAccessionId();
-          if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null)
+          if (dnaToProteinMapping != null
+                  && dnaToProteinMapping.getTo() != null)
           {
             if (mappingUsed)
             {
@@ -563,33 +577,25 @@ public class EmblEntry
   /**
    * Helper method to construct a SequenceFeature for one cds range
    * 
-   * @param exons
-   *          array of cds [start, end, ...] positions
-   * @param exonStartIndex
-   *          offset into the exons array
-   * @param proteinName
-   * @param proteinAccessionId
+   * @param type
+   *          feature type ("CDS")
+   * @param desc
+   *          description
+   * @param begin
+   *          start position
+   * @param end
+   *          end position
+   * @param group
+   *          feature group
    * @param vals
    *          map of 'miscellaneous values' for feature
-   * @param codonStart
-   *          codon start position for CDS (1/2/3, normally 1)
    * @return
    */
-  protected SequenceFeature makeCdsFeature(int[] exons, int exonStartIndex,
-          String proteinName, String proteinAccessionId,
-          Map<String, String> vals, int codonStart)
-  {
-    int exonNumber = exonStartIndex / 2 + 1;
-    SequenceFeature sf = new SequenceFeature();
-    sf.setBegin(Math.min(exons[exonStartIndex], exons[exonStartIndex + 1]));
-    sf.setEnd(Math.max(exons[exonStartIndex], exons[exonStartIndex + 1]));
-    sf.setDescription(String.format("Exon %d for protein '%s' EMBLCDS:%s",
-            exonNumber, proteinName, proteinAccessionId));
-    sf.setPhase(String.valueOf(codonStart - 1));
-    sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+"
-            : "-");
-    sf.setValue(FeatureProperties.EXONPOS, exonNumber);
-    sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
+  protected SequenceFeature makeCdsFeature(String type, String desc,
+          int begin, int end, String group, Map<String, String> vals)
+  {
+    SequenceFeature sf = new SequenceFeature(type, desc, begin, end, group);
+
     if (!vals.isEmpty())
     {
       StringBuilder sb = new StringBuilder();