Merge branch 'develop' into merge/develop_bug/JAL-2154projectMappings

author Jim Procter <jprocter@issues.jalview.org>

Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)

committer Jim Procter <jprocter@issues.jalview.org>

Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)
author Jim Procter <jprocter@issues.jalview.org>
Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)
committer Jim Procter <jprocter@issues.jalview.org>
Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)
diff --combined src/jalview/analysis/AlignmentUtils.java

index b0a2269,d1cd5a3..ea330d8
--- 1/src/jalview/analysis/AlignmentUtils.java
--- 2/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@@ -22,6 -22,7 +22,6 @@@ package jalview.analysis
   
   import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
   
- -import jalview.api.DBRefEntryI;
   import jalview.datamodel.AlignedCodon;
   import jalview.datamodel.AlignedCodonFrame;
   import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
@@@ -72,6 -73,7 +72,7 @@@ import java.util.TreeMap
   public class AlignmentUtils
   {
   
+   private static final int CODON_LENGTH = 3;
     private static final String SEQUENCE_VARIANT = "sequence_variant:";
     private static final String ID = "ID";
   
@@@ -79,15 -81,16 +80,16 @@@
      * A data model to hold the 'normal' base value at a position, and an optional
      * sequence variant feature
      */
-   static class DnaVariant
+   static final class DnaVariant
     {
-     String base;
+     final String base;
   
       SequenceFeature variant;
   
       DnaVariant(String nuc)
       {
         base = nuc;
+       variant = null;
       }
   
       DnaVariant(String nuc, SequenceFeature var)
@@@ -95,6 -98,11 +97,11 @@@
         base = nuc;
         variant = var;
       }
+ 
+     public String getSource()
+     {
+       return variant == null ? null : variant.getFeatureGroup();
+     }
     }
   
     /**
@@@ -427,7 -435,7 +434,7 @@@
       /*
        * cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)
        */
-     final int mappedLength = 3 * aaSeqChars.length;
+     final int mappedLength = CODON_LENGTH * aaSeqChars.length;
       int cdnaLength = cdnaSeqChars.length;
       int cdnaStart = cdnaSeq.getStart();
       int cdnaEnd = cdnaSeq.getEnd();
@@@ -439,14 -447,14 +446,14 @@@
        */
       if (cdnaLength != mappedLength && cdnaLength > 2)
       {
-       String lastCodon = String.valueOf(cdnaSeqChars, cdnaLength - 3, 3)
+       String lastCodon = String.valueOf(cdnaSeqChars, cdnaLength - CODON_LENGTH, CODON_LENGTH)
                 .toUpperCase();
         for (String stop : ResidueProperties.STOP)
         {
           if (lastCodon.equals(stop))
           {
-           cdnaEnd -= 3;
-           cdnaLength -= 3;
+           cdnaEnd -= CODON_LENGTH;
+           cdnaLength -= CODON_LENGTH;
             break;
           }
         }
@@@ -458,12 -466,12 +465,12 @@@
       int startOffset = 0;
       if (cdnaLength != mappedLength
               && cdnaLength > 2
-             && String.valueOf(cdnaSeqChars, 0, 3).toUpperCase()
+             && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()
                       .equals(ResidueProperties.START))
       {
-       startOffset += 3;
-       cdnaStart += 3;
-       cdnaLength -= 3;
+       startOffset += CODON_LENGTH;
+       cdnaStart += CODON_LENGTH;
+       cdnaLength -= CODON_LENGTH;
       }
   
       if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))
@@@ -472,7 -480,7 +479,7 @@@
          * protein is translation of dna (+/- start/stop codons)
          */
         MapList map = new MapList(new int[] { cdnaStart, cdnaEnd }, new int[]
-       { proteinStart, proteinEnd }, 3, 1);
+       { proteinStart, proteinEnd }, CODON_LENGTH, 1);
         return map;
       }
   
@@@ -503,9 -511,9 +510,9 @@@
       int aaPos = 0;
       int dnaPos = cdnaStart;
       for (; dnaPos < cdnaSeqChars.length - 2
-             && aaPos < aaSeqChars.length; dnaPos += 3, aaPos++)
+             && aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)
       {
-       String codon = String.valueOf(cdnaSeqChars, dnaPos, 3);
+       String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
         final String translated = ResidueProperties.codonTranslate(codon);
   
         /*
@@@ -541,9 -549,9 +548,9 @@@
       {
         return true;
       }
-     if (dnaPos == cdnaSeqChars.length - 3)
+     if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)
       {
-       String codon = String.valueOf(cdnaSeqChars, dnaPos, 3);
+       String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
         if ("STOP".equals(ResidueProperties.codonTranslate(codon)))
         {
           return true;
@@@ -894,7 -902,8 +901,8 @@@
         }
         width = Math.max(dnaSeq.getLength(), width);
       }
-     int oldwidth, diff;
+     int oldwidth;
+     int diff;
       for (SequenceI dnaSeq : dna.getSequences())
       {
         oldwidth = dnaSeq.getLength();
@@@ -934,9 -943,9 +942,9 @@@
       for (AlignedCodonFrame mapping : dnaMappings)
       {
         SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
-       int peptideLength = peptide.getLength();
         if (peptide != null)
         {
+         int peptideLength = peptide.getLength();
           Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
           if (map != null)
           {
@@@ -950,7 -959,7 +958,7 @@@
                     .getFromRanges());
             int mappedToLength = MappingUtils
                     .getLength(mapList.getToRanges());
-           boolean addStopCodon = (cdsLength == mappedFromLength * 3 + 3)
+           boolean addStopCodon = (cdsLength == mappedFromLength * CODON_LENGTH + CODON_LENGTH)
                     || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
             if (cdsLength != mappedToLength && !addStopCodon)
             {
@@@ -964,8 -973,8 +972,8 @@@
             /*
              * pre-fill the aligned cds sequence with gaps
              */
-           char[] alignedCds = new char[peptideLength * 3
-                   + (addStopCodon ? 3 : 0)];
+           char[] alignedCds = new char[peptideLength * CODON_LENGTH
+                   + (addStopCodon ? CODON_LENGTH : 0)];
             Arrays.fill(alignedCds, gapChar);
   
             /*
@@@ -982,7 -991,7 +990,7 @@@
             {
               if (Comparison.isGap(residue))
               {
-               cdsCol += 3;
+               cdsCol += CODON_LENGTH;
               }
               else
               {
@@@ -991,7 -1000,7 +999,7 @@@
                 if (codon == null)
                 {
                   // e.g. incomplete start codon, X in peptide
-                 cdsCol += 3;
+                 cdsCol += CODON_LENGTH;
                 }
                 else
                 {
@@@ -1009,7 -1018,7 +1017,7 @@@
              * append stop codon if not mapped from protein,
              * closing it up to the end of the mapped sequence
              */
-           if (copiedBases == nucleotides.length - 3)
+           if (copiedBases == nucleotides.length - CODON_LENGTH)
             {
               for (int i = alignedCds.length - 1; i >= 0; i--)
               {
@@@ -1019,7 -1028,7 +1027,7 @@@
                   break;
                 }
               }
-             for (int i = nucleotides.length - 3; i < nucleotides.length; i++)
+             for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++)
               {
                 alignedCds[cdsCol++] = nucleotides[i];
               }
@@@ -1680,20 -1689,11 +1688,20 @@@
              * didn't find mapped CDS sequence - construct it and add
              * its dataset sequence to the dataset
              */
- -          cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
- -          SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
+ +          cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,
+ +                  dataset).deriveSequence();
+ +          // cdsSeq has a name constructed as CDS|<dbref>
+ +          // <dbref> will be either the accession for the coding sequence,
+ +          // marked in the /via/ dbref to the protein product accession
+ +          // or it will be the original nucleotide accession.
+ +          SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();
+ +
             cdsSeqs.add(cdsSeq);
+ +
             if (!dataset.getSequences().contains(cdsSeqDss))
             {
+ +            // check if this sequence is a newly created one
+ +            // so needs adding to the dataset
               dataset.addSequence(cdsSeqDss);
             }
   
@@@ -1705,8 -1705,7 +1713,8 @@@
             MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(),
                     mapList.getFromRatio(), mapList.getToRatio());
             AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
- -          cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap);
+ +          cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+ +                  cdsToProteinMap);
   
             /*
              * guard against duplicating the mapping if repeating this action
@@@ -1716,8 -1715,23 +1724,8 @@@
               mappings.add(cdsToProteinMapping);
             }
   
- -          /*
- -           * copy protein's dbrefs to CDS sequence
- -           * this enables Get Cross-References from CDS alignment
- -           */
- -          DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false,
- -                  proteinProduct.getDBRefs());
- -          if (proteinRefs != null)
- -          {
- -            for (DBRefEntry ref : proteinRefs)
- -            {
- -              DBRefEntry cdsToProteinRef = new DBRefEntry(ref);
- -              cdsToProteinRef.setMap(new Mapping(proteinProduct,
- -                      cdsToProteinMap));
- -              cdsSeqDss.addDBRef(cdsToProteinRef);
- -            }
- -          }
- -
+ +          propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
+ +                  proteinProduct, aMapping);
             /*
              * add another mapping from original 'from' range to CDS
              */
@@@ -1725,7 -1739,7 +1733,7 @@@
             MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
                     cdsRange, 1,
                     1);
- -          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq,
+ +          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
                     dnaToCdsMap);
             if (!mappings.contains(dnaToCdsMapping))
             {
@@@ -1739,37 -1753,12 +1747,37 @@@
              * same source and accession, so need a different accession for
              * the CDS from the dna sequence
              */
- -          DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
- -          if (dnaRef != null)
+ +          
+ +          // specific use case:
+ +          // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+ +          // ENSG02, ENSG03, with transcripts and products similarly named.
+ +          // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+ +          
+ +          // JBPNote: ?? can't actually create an example that demonstrates we
+ +          // need to
+ +          // synthesize an xref.
+ +          
+ +          for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
             {
+ +            // creates a complementary cross-reference to the source sequence's
+ +            // primary reference.
+ +
+ +            DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(),
+ +                    primRef.getSource() + ":" + primRef.getVersion(),
+ +                    primRef.getAccessionId());
+ +            cdsCrossRef
+ +                    .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap)));
+ +            cdsSeqDss.addDBRef(cdsCrossRef);
+ +
+ +            // problem here is that the cross-reference is synthesized -
+ +            // cdsSeq.getName() may be like 'CDS|dnaaccession' or
+ +            // 'CDS|emblcdsacc'
               // assuming cds version same as dna ?!?
- -            DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
- -                    dnaRef.getVersion(), cdsSeq.getName());
+ +
+ +            DBRefEntry proteinToCdsRef = new DBRefEntry(
+ +                    primRef.getSource(), primRef.getVersion(),
+ +                    cdsSeq.getName());
+ +            //
               proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
                       .getInverse()));
               proteinProduct.addDBRef(proteinToCdsRef);
@@@ -1825,7 -1814,7 +1833,7 @@@
       int mappedFromLength = MappingUtils.getLength(aMapping.getMap()
               .getFromRanges());
       int dnaLength = seqDss.getLength();
-     if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - 3)
+     if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - CODON_LENGTH)
       {
         return seqDss;
       }
@@@ -1841,7 -1830,7 +1849,7 @@@
         for (SequenceToSequenceMapping map : acf.getMappings())
         {
           Mapping mapping = map.getMapping();
-         if (mapping != aMapping && mapping.getMap().getFromRatio() == 3
+         if (mapping != aMapping && mapping.getMap().getFromRatio() == CODON_LENGTH
                   && proteinProduct == mapping.getTo()
                   && seqDss != map.getFromSeq())
           {
@@@ -1875,14 -1864,9 +1883,14 @@@
      * 
      * @param seq
      * @param mapping
+ +   * @param dataset
+ +   *          - existing dataset. We check for sequences that look like the CDS
+ +   *          we are about to construct, if one exists already, then we will
+ +   *          just return that one.
      * @return CDS sequence (as a dataset sequence)
      */
- -  static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
+ +  static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,
+ +          AlignmentI dataset)
     {
       char[] seqChars = seq.getSequence();
       List<int[]> fromRanges = mapping.getMap().getFromRanges();
@@@ -1909,7 -1893,7 +1917,7 @@@
           }
         }
       }
- -
+ +    
       /*
        * assign 'from id' held in the mapping if set (e.g. EMBL protein_id),
        * else generate a sequence name
@@@ -1917,124 -1901,12 +1925,124 @@@
       String mapFromId = mapping.getMappedFromId();
       String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName());
       SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos);
+ +    if (dataset != null)
+ +    {
+ +      SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());
+ +      if (matches != null)
+ +      {
+ +        boolean matched = false;
+ +        for (SequenceI mtch : matches)
+ +        {
+ +          if (mtch.getStart() != newSeq.getStart())
+ +          {
+ +            continue;
+ +          }
+ +          if (mtch.getEnd() != newSeq.getEnd())
+ +          {
+ +            continue;
+ +          }
+ +          if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))
+ +          {
+ +            continue;
+ +          }
+ +          if (!matched)
+ +          {
+ +            matched = true;
+ +            newSeq = mtch;
+ +          }
+ +          else
+ +          {
+ +            System.err
+ +                    .println("JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+ +                            + mtch.toString());
+ +          }
+ +        }
+ +      }
+ +    }
       // newSeq.setDescription(mapFromId);
   
       return newSeq;
     }
   
     /**
+ +   * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+ +   * the given mapping.
+ +   * 
+ +   * @param cdsSeq
+ +   * @param contig
+ +   * @param mapping
+ +   * @return list of DBRefEntrys added.
+ +   */
+ +  public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+ +          SequenceI contig, SequenceI proteinProduct, Mapping mapping)
+ +  {
+ +
+ +    // gather direct refs from contig congrent with mapping
+ +    List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
+ +    HashSet<String> directSources = new HashSet<String>();
+ +    if (contig.getDBRefs() != null)
+ +    {
+ +      for (DBRefEntry dbr : contig.getDBRefs())
+ +      {
+ +        if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+ +        {
+ +          MapList map = dbr.getMap().getMap();
+ +          // check if map is the CDS mapping
+ +          if (mapping.getMap().equals(map))
+ +          {
+ +            direct.add(dbr);
+ +            directSources.add(dbr.getSource());
+ +          }
+ +        }
+ +      }
+ +    }
+ +    DBRefEntry[] onSource = DBRefUtils.selectRefs(
+ +            proteinProduct.getDBRefs(),
+ +            directSources.toArray(new String[0]));
+ +    List<DBRefEntry> propagated = new ArrayList<DBRefEntry>();
+ +
+ +    // and generate appropriate mappings
+ +    for (DBRefEntry cdsref : direct)
+ +    {
+ +      // clone maplist and mapping
+ +      MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[]
+ +      { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap()
+ +              .getToRanges(), 3, 1);
+ +      Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap()
+ +              .getMap());
+ +
+ +      // create dbref
+ +      DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
+ +              cdsref.getVersion(), cdsref.getAccessionId(), new Mapping(
+ +                      cdsmap.getTo(), cdsposmap));
+ +
+ +      // and see if we can map to the protein product for this mapping.
+ +      // onSource is the filtered set of accessions on protein that we are
+ +      // tranferring, so we assume accession is the same.
+ +      if (cdsmap.getTo() == null && onSource != null)
+ +      {
+ +        List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
+ +                cdsref.getAccessionId());
+ +        if (sourceRefs != null)
+ +        {
+ +          for (DBRefEntry srcref : sourceRefs)
+ +          {
+ +            if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
+ +            {
+ +              // we have found a complementary dbref on the protein product, so
+ +              // update mapping's getTo
+ +              newref.getMap().setTo(proteinProduct);
+ +            }
+ +          }
+ +        }
+ +      }
+ +      cdsSeq.addDBRef(newref);
+ +      propagated.add(newref);
+ +    }
+ +    return propagated;
+ +  }
+ +
+ +  /**
      * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
      * feature start/end ranges, optionally omitting specified feature types.
      * Returns the number of features copied.
@@@ -2163,7 -2035,7 +2171,7 @@@
       /*
        * dna length should map to protein (or protein plus stop codon)
        */
-     int codesForResidues = mappedDnaLength / 3;
+     int codesForResidues = mappedDnaLength / CODON_LENGTH;
       if (codesForResidues == (proteinLength + 1))
       {
         // assuming extra codon is for STOP and not in peptide
@@@ -2172,7 -2044,7 +2180,7 @@@
       if (codesForResidues == proteinLength)
       {
         proteinRange.add(new int[] { proteinStart, proteinEnd });
-       return new MapList(ranges, proteinRange, 3, 1);
+       return new MapList(ranges, proteinRange, CODON_LENGTH, 1);
       }
       return null;
     }
@@@ -2448,7 -2320,7 +2456,7 @@@
        * are currently ignored here
        */
       String trans = codon.contains("-") ? "-"
-             : (codon.length() > 3 ? null : ResidueProperties
+             : (codon.length() > CODON_LENGTH ? null : ResidueProperties
                       .codonTranslate(codon));
       if (trans != null && !trans.equals(residue))
       {
@@@ -2460,7 -2332,7 +2468,7 @@@
         // set score to 0f so 'graduated colour' option is offered! JAL-2060
         SequenceFeature sf = new SequenceFeature(
                 SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
-               peptidePos, 0f, "Jalview");
+               peptidePos, 0f, var.getSource());
         StringBuilder attributes = new StringBuilder(32);
         String id = (String) var.variant.getValue(ID);
         if (id != null)
@@@ -2471,7 -2343,7 +2479,7 @@@
           }
           sf.setValue(ID, id);
           attributes.append(ID).append("=").append(id);
-         // TODO handle other species variants
+         // TODO handle other species variants JAL-2064
           StringBuilder link = new StringBuilder(32);
           try
           {
@@@ -2510,6 -2382,7 +2518,7 @@@
      * @param dnaToProtein
      * @return
      */
+   @SuppressWarnings("unchecked")
     static LinkedHashMap<Integer, List<DnaVariant>[]> buildDnaVariantsMap(
             SequenceI dnaSeq, MapList dnaToProtein)
     {
@@@ -2553,7 -2426,7 +2562,7 @@@
           List<DnaVariant>[] codonVariants = variants.get(peptidePosition);
           if (codonVariants == null)
           {
-           codonVariants = new ArrayList[3];
+           codonVariants = new ArrayList[CODON_LENGTH];
             codonVariants[0] = new ArrayList<DnaVariant>();
             codonVariants[1] = new ArrayList<DnaVariant>();
             codonVariants[2] = new ArrayList<DnaVariant>();
@@@ -2587,7 -2460,7 +2596,7 @@@
           /*
            * save nucleotide (and any variant) for each codon position
            */
-         for (int codonPos = 0; codonPos < 3; codonPos++)
+         for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
           {
             String nucleotide = String.valueOf(
                     dnaSeq.getCharAt(codon[codonPos] - dnaStart))
@@@ -2640,7 -2513,7 +2649,7 @@@
     {
       AlignmentI copy = new Alignment(new Alignment(seqs));
       copy.setDataset(dataset);
- -
+ +    boolean isProtein = !copy.isNucleotide();
       SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
       if (xrefs != null)
       {
@@@ -2651,8 -2524,7 +2660,8 @@@
           {
             for (DBRefEntry dbref : dbrefs)
             {
- -            if (dbref.getMap() == null || dbref.getMap().getTo() == null)
+ +            if (dbref.getMap() == null || dbref.getMap().getTo() == null
+ +                    || dbref.getMap().getTo().isProtein() != isProtein)
               {
                 continue;
               }
diff --combined src/jalview/ext/ensembl/EnsemblSeqProxy.java

index 5a32736,cc002e1..5fccedd
--- 1/src/jalview/ext/ensembl/EnsemblSeqProxy.java
--- 2/src/jalview/ext/ensembl/EnsemblSeqProxy.java
+++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java
@@@ -276,7 -276,8 +276,7 @@@ public abstract class EnsemblSeqProxy e
         {
           // clunky: ensure Uniprot xref if we have one is on mapped sequence
           SequenceI ds = proteinSeq.getDatasetSequence();
- -        ds.setSourceDBRef(proteinSeq.getSourceDBRef());
- -
+ +        // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
           Mapping map = new Mapping(ds, mapList);
           DBRefEntry dbr = new DBRefEntry(getDbSource(),
                   getEnsemblDataVersion(), proteinSeq.getName(), map);
@@@ -308,8 -309,7 +308,8 @@@
         seq = seq.getDatasetSequence();
       }
   
- -    EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
+ +    EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(),
+ +            getEnsemblDataVersion());
       List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
       for (DBRefEntry xref : xrefs)
       {
@@@ -322,6 -322,7 +322,6 @@@
       DBRefEntry self = new DBRefEntry(getDbSource(),
               getEnsemblDataVersion(), seq.getName());
       seq.addDBRef(self);
- -    seq.setSourceDBRef(self);
     }
   
     /**
@@@ -381,7 -382,7 +381,7 @@@
           {
             DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
                     getEnsemblDataVersion(), name);
- -          sq.setSourceDBRef(dbref);
+ +          sq.addDBRef(dbref);
           }
         }
         if (alignment == null)
@@@ -612,6 -613,10 +612,10 @@@
         SequenceFeature copy = new SequenceFeature(sf);
         copy.setBegin(Math.min(mappedRange[0], mappedRange[1]));
         copy.setEnd(Math.max(mappedRange[0], mappedRange[1]));
+       if (".".equals(copy.getFeatureGroup()))
+       {
+         copy.setFeatureGroup(getDbSource());
+       }
         targetSequence.addSequenceFeature(copy);
   
         /*
diff --combined test/jalview/analysis/AlignmentUtilsTests.java

index 7e8442d,a856231..ddd38e7
--- 1/test/jalview/analysis/AlignmentUtilsTests.java
--- 2/test/jalview/analysis/AlignmentUtilsTests.java
+++ b/test/jalview/analysis/AlignmentUtilsTests.java
@@@ -994,44 -994,29 +994,44 @@@ public class AlignmentUtilsTest
   
       /*
        * need a sourceDbRef if we are to construct dbrefs to the CDS
- -     * sequence
+ +     * sequence from the dna contig sequences
        */
       DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
- -    dna1.getDatasetSequence().setSourceDBRef(dbref);
+ +    dna1.getDatasetSequence().addDBRef(dbref);
+ +    org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
       dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
- -    dna2.getDatasetSequence().setSourceDBRef(dbref);
+ +    dna2.getDatasetSequence().addDBRef(dbref);
+ +    org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
   
       /*
        * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
        * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
        */
- -    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ +    MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
               new int[] { 1, 2 }, 3, 1);
       AlignedCodonFrame acf = new AlignedCodonFrame();
- -    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ +    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+ +            mapfordna1);
       dna.addCodonFrame(acf);
- -    map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ +    MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+ +            new int[] { 1, 3 },
               3, 1);
       acf = new AlignedCodonFrame();
- -    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ +    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
+ +            mapfordna2);
       dna.addCodonFrame(acf);
   
       /*
+ +     * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+ +     */
+ +    DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
+ +            new Mapping(mapfordna1));
+ +    dna1.getDatasetSequence().addDBRef(dna1xref);
+ +    DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
+ +            new Mapping(mapfordna2));
+ +    dna2.getDatasetSequence().addDBRef(dna2xref);
+ +
+ +    /*
        * execute method under test:
        */
       AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
@@@ -1057,12 -1042,11 +1057,12 @@@
        * verify CDS has a dbref with mapping to peptide
        */
       assertNotNull(cds1Dss.getDBRefs());
- -    assertEquals(1, cds1Dss.getDBRefs().length);
+ +    assertEquals(2, cds1Dss.getDBRefs().length);
       dbref = cds1Dss.getDBRefs()[0];
- -    assertEquals("UNIPROT", dbref.getSource());
- -    assertEquals("0", dbref.getVersion());
- -    assertEquals("pep1", dbref.getAccessionId());
+ +    assertEquals(dna1xref.getSource(), dbref.getSource());
+ +    // version is via ensembl's primary ref
+ +    assertEquals(dna1xref.getVersion(), dbref.getVersion());
+ +    assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
       assertNotNull(dbref.getMap());
       assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
       MapList cdsMapping = new MapList(new int[] { 1, 6 },
@@@ -1073,7 -1057,6 +1073,7 @@@
        * verify peptide has added a dbref with reverse mapping to CDS
        */
       assertNotNull(pep1.getDBRefs());
+ +    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
       assertEquals(2, pep1.getDBRefs().length);
       dbref = pep1.getDBRefs()[1];
       assertEquals("ENSEMBL", dbref.getSource());
@@@ -1954,13 -1937,15 +1954,15 @@@
     public void testComputePeptideVariants()
     {
       /*
-      * scenario: AAATTTCCC codes for KFP, with variants
-      *           GAA -> E
-      *           CAA -> Q
-      *           AAG synonymous
-      *           AAT -> N
-      *              TTC synonymous
-      *                 CAC,CGC -> H,R (as one variant)
+      * scenario: AAATTTCCC codes for KFP
+      * variants:
+      *           GAA -> E             source: Ensembl
+      *           CAA -> Q             source: dbSNP
+      *           AAG synonymous       source: COSMIC
+      *           AAT -> N             source: Ensembl
+      *           ...TTC synonymous    source: dbSNP
+      *           ......CAC,CGC -> H,R source: COSMIC
+      *                 (one variant with two alleles)
        */
       SequenceI peptide = new Sequence("pep/10-12", "KFP");
   
@@@ -1968,32 -1953,35 +1970,35 @@@
        * two distinct variants for codon 1 position 1
        * second one has clinical significance
        */
+     String ensembl = "Ensembl";
+     String dbSnp = "dbSNP";
+     String cosmic = "COSMIC";
       SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
-             0f, null);
+             0f, ensembl);
       sf1.setValue("alleles", "A,G"); // GAA -> E
       sf1.setValue("ID", "var1.125A>G");
       SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
-             0f, null);
+             0f, dbSnp);
       sf2.setValue("alleles", "A,C"); // CAA -> Q
       sf2.setValue("ID", "var2");
       sf2.setValue("clinical_significance", "Dodgy");
       SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3,
-             0f, null);
+             0f, cosmic);
       sf3.setValue("alleles", "A,G"); // synonymous
       sf3.setValue("ID", "var3");
       sf3.setValue("clinical_significance", "None");
       SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
-             0f, null);
+             0f, ensembl);
       sf4.setValue("alleles", "A,T"); // AAT -> N
       sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off
       sf4.setValue("clinical_significance", "Benign");
       SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6,
-             0f, null);
+             0f, dbSnp);
       sf5.setValue("alleles", "T,C"); // synonymous
       sf5.setValue("ID", "var5");
       sf5.setValue("clinical_significance", "Bad");
       SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8,
-             0f, null);
+             0f, cosmic);
       sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R
       sf6.setValue("ID", "var6");
       sf6.setValue("clinical_significance", "Good");
@@@ -2041,14 -2029,15 +2046,15 @@@
   
       /*
        * verify added sequence features for
-      * var1 K -> E
-      * var2 K -> Q
-      * var4 K -> N
-      * var6 P -> H
-      * var6 P -> R
+      * var1 K -> E Ensembl
+      * var2 K -> Q dbSNP
+      * var4 K -> N Ensembl
+      * var6 P -> H COSMIC
+      * var6 P -> R COSMIC
        */
       SequenceFeature[] sfs = peptide.getSequenceFeatures();
       assertEquals(5, sfs.length);
+ 
       SequenceFeature sf = sfs[0];
       assertEquals(1, sf.getBegin());
       assertEquals(1, sf.getEnd());
@@@ -2061,7 -2050,8 +2067,8 @@@
       assertEquals(
               "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
               sf.links.get(0));
-     assertEquals("Jalview", sf.getFeatureGroup());
+     assertEquals(ensembl, sf.getFeatureGroup());
+ 
       sf = sfs[1];
       assertEquals(1, sf.getBegin());
       assertEquals(1, sf.getEnd());
@@@ -2073,7 -2063,8 +2080,8 @@@
       assertEquals(
               "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
               sf.links.get(0));
-     assertEquals("Jalview", sf.getFeatureGroup());
+     assertEquals(dbSnp, sf.getFeatureGroup());
+ 
       sf = sfs[2];
       assertEquals(1, sf.getBegin());
       assertEquals(1, sf.getEnd());
@@@ -2085,7 -2076,9 +2093,9 @@@
       assertEquals(
               "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
               sf.links.get(0));
-     assertEquals("Jalview", sf.getFeatureGroup());
+     assertEquals(ensembl, sf.getFeatureGroup());
+ 
+     // var5 generates two distinct protein variant features
       sf = sfs[3];
       assertEquals(3, sf.getBegin());
       assertEquals(3, sf.getEnd());
@@@ -2097,8 -2090,8 +2107,8 @@@
       assertEquals(
               "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
               sf.links.get(0));
-     // var5 generates two distinct protein variant features
-     assertEquals("Jalview", sf.getFeatureGroup());
+     assertEquals(cosmic, sf.getFeatureGroup());
+ 
       sf = sfs[4];
       assertEquals(3, sf.getBegin());
       assertEquals(3, sf.getEnd());
@@@ -2110,7 -2103,7 +2120,7 @@@
       assertEquals(
               "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
               sf.links.get(0));
-     assertEquals("Jalview", sf.getFeatureGroup());
+     assertEquals(cosmic, sf.getFeatureGroup());
     }
   
     /**
author	Jim Procter <jprocter@issues.jalview.org>
	Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)
committer	Jim Procter <jprocter@issues.jalview.org>
	Tue, 30 Aug 2016 13:30:34 +0000 (14:30 +0100)
		1	2
src/jalview/analysis/AlignmentUtils.java	patch \|	diff1 \|	diff2 \|	blob \| history
src/jalview/ext/ensembl/EnsemblSeqProxy.java	patch \|	diff1 \|	diff2 \|	blob \| history
test/jalview/analysis/AlignmentUtilsTests.java	patch \|	diff1 \|	diff2 \|	blob \| history