JAL-2189 source formatting
[jalview.git] / src / jalview / ext / ensembl / EnsemblSeqProxy.java
index 8fb668a..9109fb2 100644 (file)
@@ -2,6 +2,7 @@ package jalview.ext.ensembl;
 
 import jalview.analysis.AlignmentUtils;
 import jalview.analysis.Dna;
+import jalview.bin.Cache;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -14,6 +15,7 @@ import jalview.io.FastaFile;
 import jalview.io.FileParse;
 import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
+import jalview.util.Comparison;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
 
@@ -158,6 +160,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
                 + ")";
         System.err.println(msg);
+        r.printStackTrace();
         break;
       }
     }
@@ -271,23 +274,63 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       proteinSeq.createDatasetSequence();
       querySeq.createDatasetSequence();
 
-      MapList mapList = AlignmentUtils.mapCdsToProtein(querySeq, proteinSeq);
+      MapList mapList = AlignmentUtils
+              .mapCdsToProtein(querySeq, proteinSeq);
       if (mapList != null)
       {
         // clunky: ensure Uniprot xref if we have one is on mapped sequence
         SequenceI ds = proteinSeq.getDatasetSequence();
-        ds.setSourceDBRef(proteinSeq.getSourceDBRef());
-
+        // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
         Mapping map = new Mapping(ds, mapList);
-        DBRefEntry dbr = new DBRefEntry(getDbSource(), getDbVersion(),
-                proteinSeq.getName(), map);
+        DBRefEntry dbr = new DBRefEntry(getDbSource(),
+                getEnsemblDataVersion(), proteinSeq.getName(), map);
         querySeq.getDatasetSequence().addDBRef(dbr);
-        
+        DBRefEntry[] uprots = DBRefUtils.selectRefs(ds.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        DBRefEntry[] upxrefs = DBRefUtils.selectRefs(querySeq.getDBRefs(),
+                new String[] { DBRefSource.UNIPROT });
+        if (uprots != null)
+        {
+          for (DBRefEntry up : uprots)
+          {
+            // locate local uniprot ref and map
+            List<DBRefEntry> upx = DBRefUtils.searchRefs(upxrefs,
+                    up.getAccessionId());
+            DBRefEntry upxref;
+            if (upx.size() != 0)
+            {
+              upxref = upx.get(0);
+
+              if (upx.size() > 1)
+              {
+                Cache.log
+                        .warn("Implementation issue - multiple uniprot acc on product sequence.");
+              }
+            }
+            else
+            {
+              upxref = new DBRefEntry(DBRefSource.UNIPROT,
+                      getEnsemblDataVersion(), up.getAccessionId());
+            }
+
+            Mapping newMap = new Mapping(ds, mapList);
+            upxref.setVersion(getEnsemblDataVersion());
+            upxref.setMap(newMap);
+            if (upx.size() == 0)
+            {
+              // add the new uniprot ref
+              querySeq.getDatasetSequence().addDBRef(upxref);
+            }
+
+          }
+        }
+
         /*
          * copy exon features to protein, compute peptide variants from dna 
          * variants and add as features on the protein sequence ta-da
          */
-        AlignmentUtils.computeProteinFeatures(querySeq, proteinSeq, mapList);
+        AlignmentUtils
+                .computeProteinFeatures(querySeq, proteinSeq, mapList);
       }
     } catch (Exception e)
     {
@@ -309,24 +352,19 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       seq = seq.getDatasetSequence();
     }
 
-    EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
+    EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(),
+            getEnsemblDataVersion());
     List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
     for (DBRefEntry xref : xrefs)
     {
       seq.addDBRef(xref);
-      /*
-       * Save any Uniprot xref to be the reference for SIFTS mapping
-       */
-      if (DBRefSource.UNIPROT.equals(xref.getSource()))
-      {
-        seq.setSourceDBRef(xref);
-      }
     }
 
     /*
      * and add a reference to itself
      */
-    DBRefEntry self = new DBRefEntry(getDbSource(), "0", seq.getName());
+    DBRefEntry self = new DBRefEntry(getDbSource(),
+            getEnsemblDataVersion(), seq.getName());
     seq.addDBRef(self);
   }
 
@@ -349,6 +387,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       throw new JalviewException("ENSEMBL Rest API not available.");
     }
     FileParse fp = getSequenceReader(ids);
+    if (fp == null)
+    {
+      return alignment;
+    }
+
     FastaFile fr = new FastaFile(fp);
     if (fr.hasWarningMessage())
     {
@@ -373,9 +416,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
     if (fr.getSeqs().size() > 0)
     {
-      AlignmentI seqal = new Alignment(
-              fr.getSeqsAsArray());
-      for (SequenceI sq:seqal.getSequences())
+      AlignmentI seqal = new Alignment(fr.getSeqsAsArray());
+      for (SequenceI sq : seqal.getSequences())
       {
         if (sq.getDescription() == null)
         {
@@ -385,7 +427,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         if (ids.contains(name)
                 || ids.contains(name.replace("ENSP", "ENST")))
         {
-          DBRefUtils.parseToDbRef(sq, DBRefSource.ENSEMBL, "0", name);
+          DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
+                  getEnsemblDataVersion(), name);
+          sq.addDBRef(dbref);
         }
       }
       if (alignment == null)
@@ -505,7 +549,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     int mappedLength = 0;
     int direction = 1; // forward
     boolean directionSet = false;
-  
+
     for (SequenceFeature sf : sfs)
     {
       /*
@@ -522,20 +566,20 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
           // abort - mix of forward and backward
           System.err.println("Error: forward and backward strand for "
                   + accId);
-            return null;
-          }
-          direction = strand;
-          directionSet = true;
-  
-          /*
-           * add to CDS ranges, semi-sorted forwards/backwards
-           */
-          if (strand < 0)
-          {
-            regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
-          }
-          else
-          {
+          return null;
+        }
+        direction = strand;
+        directionSet = true;
+
+        /*
+         * add to CDS ranges, semi-sorted forwards/backwards
+         */
+        if (strand < 0)
+        {
+          regions.add(0, new int[] { sf.getEnd(), sf.getBegin() });
+        }
+        else
+        {
           regions.add(new int[] { sf.getBegin(), sf.getEnd() });
         }
         mappedLength += Math.abs(sf.getEnd() - sf.getBegin() + 1);
@@ -550,7 +594,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         }
       }
     }
-  
+
     if (regions.isEmpty())
     {
       System.out.println("Failed to identify target sequence for " + accId
@@ -563,10 +607,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      * (havana / ensembl_havana)
      */
     Collections.sort(regions, new RangeSorter(direction == 1));
-  
+
     List<int[]> to = Arrays.asList(new int[] { start,
         start + mappedLength - 1 });
-  
+
     return new MapList(regions, to, 1, 1);
   }
 
@@ -610,12 +654,16 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     int start = sf.getBegin();
     int end = sf.getEnd();
     int[] mappedRange = mapping.locateInTo(start, end);
-  
+
     if (mappedRange != null)
     {
       SequenceFeature copy = new SequenceFeature(sf);
       copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
       copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
+      if (".".equals(copy.getFeatureGroup()))
+      {
+        copy.setFeatureGroup(getDbSource());
+      }
       targetSequence.addSequenceFeature(copy);
 
       /*
@@ -679,16 +727,21 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     {
       complement.append(",");
     }
-    if ("HGMD_MUTATION".equalsIgnoreCase(allele))
+
+    /*
+     * some 'alleles' are actually descriptive terms 
+     * e.g. HGMD_MUTATION, PhenCode_variation
+     * - we don't want to 'reverse complement' these
+     */
+    if (!Comparison.isNucleotideSequence(allele, true))
     {
       complement.append(allele);
     }
     else
     {
-      char[] alleles = allele.toCharArray();
-      for (int i = alleles.length - 1; i >= 0; i--)
+      for (int i = allele.length() - 1; i >= 0; i--)
       {
-        complement.append(Dna.getComplement(alleles[i]));
+        complement.append(Dna.getComplement(allele.charAt(i)));
       }
     }
   }
@@ -711,8 +764,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
 
     // long start = System.currentTimeMillis();
     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
-    MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
-            targetSequence.getStart());
+    MapList mapping = getGenomicRangesFromFeatures(sourceSequence,
+            accessionId, targetSequence.getStart());
     if (mapping == null)
     {
       return false;
@@ -843,11 +896,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
           String type, String parentId)
   {
     List<SequenceFeature> result = new ArrayList<SequenceFeature>();
-    
+
     SequenceFeature[] sfs = sequence.getSequenceFeatures();
-    if (sfs != null) {
+    if (sfs != null)
+    {
       SequenceOntologyI so = SequenceOntologyFactory.getInstance();
-      for (SequenceFeature sf :sfs) {
+      for (SequenceFeature sf : sfs)
+      {
         if (so.isA(sf.getType(), type))
         {
           String parent = (String) sf.getValue(PARENT);