Merge branch 'develop' into merge/develop_bug/JAL-2154projectMappings merge/develop_bug/JAL-2154projectMappings
authorJim Procter <jprocter@issues.jalview.org>
Tue, 30 Aug 2016 17:06:00 +0000 (18:06 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Tue, 30 Aug 2016 17:06:00 +0000 (18:06 +0100)
39 files changed:
src/jalview/analysis/AlignmentUtils.java
src/jalview/analysis/CrossRef.java
src/jalview/api/DBRefEntryI.java
src/jalview/datamodel/Alignment.java
src/jalview/datamodel/AlignmentI.java
src/jalview/datamodel/DBRefEntry.java
src/jalview/datamodel/DBRefSource.java
src/jalview/datamodel/Sequence.java
src/jalview/datamodel/SequenceI.java
src/jalview/datamodel/xdb/embl/EmblEntry.java
src/jalview/ext/ensembl/EnsemblGene.java
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/ext/ensembl/EnsemblSymbol.java
src/jalview/ext/ensembl/EnsemblXref.java
src/jalview/gui/AlignFrame.java
src/jalview/gui/CrossRefAction.java [new file with mode: 0644]
src/jalview/gui/Jalview2XML.java
src/jalview/gui/SequenceFetcher.java
src/jalview/gui/StructureChooser.java
src/jalview/io/StructureFile.java
src/jalview/structure/StructureSelectionManager.java
src/jalview/util/DBRefUtils.java
src/jalview/util/LinkedIdentityHashSet.java [new file with mode: 0644]
src/jalview/util/MapList.java
src/jalview/ws/DBRefFetcher.java
src/jalview/ws/dbsources/Uniprot.java
src/jalview/ws/sifts/SiftsClient.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/analysis/CrossRefTest.java
test/jalview/datamodel/AlignmentTest.java
test/jalview/datamodel/DBRefEntryTest.java
test/jalview/datamodel/SequenceTest.java
test/jalview/datamodel/xdb/embl/EmblEntryTest.java
test/jalview/ext/ensembl/EnsemblXrefTest.java
test/jalview/io/CrossRef2xmlTests.java [new file with mode: 0644]
test/jalview/io/Jalview2xmlBase.java [new file with mode: 0644]
test/jalview/io/Jalview2xmlTests.java
test/jalview/io/testProps_nodas.jvprops [new file with mode: 0644]
test/jalview/ws/seqfetcher/DbRefFetcherTest.java

index d1cd5a3..ea330d8 100644 (file)
@@ -22,7 +22,6 @@ package jalview.analysis;
 
 import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
 
-import jalview.api.DBRefEntryI;
 import jalview.datamodel.AlignedCodon;
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
@@ -1689,11 +1688,20 @@ public class AlignmentUtils
            * didn't find mapped CDS sequence - construct it and add
            * its dataset sequence to the dataset
            */
-          cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
-          SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
+          cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,
+                  dataset).deriveSequence();
+          // cdsSeq has a name constructed as CDS|<dbref>
+          // <dbref> will be either the accession for the coding sequence,
+          // marked in the /via/ dbref to the protein product accession
+          // or it will be the original nucleotide accession.
+          SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();
+
           cdsSeqs.add(cdsSeq);
+
           if (!dataset.getSequences().contains(cdsSeqDss))
           {
+            // check if this sequence is a newly created one
+            // so needs adding to the dataset
             dataset.addSequence(cdsSeqDss);
           }
 
@@ -1705,7 +1713,8 @@ public class AlignmentUtils
           MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(),
                   mapList.getFromRatio(), mapList.getToRatio());
           AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
-          cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap);
+          cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+                  cdsToProteinMap);
 
           /*
            * guard against duplicating the mapping if repeating this action
@@ -1715,23 +1724,8 @@ public class AlignmentUtils
             mappings.add(cdsToProteinMapping);
           }
 
-          /*
-           * copy protein's dbrefs to CDS sequence
-           * this enables Get Cross-References from CDS alignment
-           */
-          DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false,
-                  proteinProduct.getDBRefs());
-          if (proteinRefs != null)
-          {
-            for (DBRefEntry ref : proteinRefs)
-            {
-              DBRefEntry cdsToProteinRef = new DBRefEntry(ref);
-              cdsToProteinRef.setMap(new Mapping(proteinProduct,
-                      cdsToProteinMap));
-              cdsSeqDss.addDBRef(cdsToProteinRef);
-            }
-          }
-
+          propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
+                  proteinProduct, aMapping);
           /*
            * add another mapping from original 'from' range to CDS
            */
@@ -1739,7 +1733,7 @@ public class AlignmentUtils
           MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
                   cdsRange, 1,
                   1);
-          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq,
+          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
                   dnaToCdsMap);
           if (!mappings.contains(dnaToCdsMapping))
           {
@@ -1753,12 +1747,37 @@ public class AlignmentUtils
            * same source and accession, so need a different accession for
            * the CDS from the dna sequence
            */
-          DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
-          if (dnaRef != null)
+          
+          // specific use case:
+          // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+          // ENSG02, ENSG03, with transcripts and products similarly named.
+          // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+          
+          // JBPNote: ?? can't actually create an example that demonstrates we
+          // need to
+          // synthesize an xref.
+          
+          for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
           {
+            // creates a complementary cross-reference to the source sequence's
+            // primary reference.
+
+            DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(),
+                    primRef.getSource() + ":" + primRef.getVersion(),
+                    primRef.getAccessionId());
+            cdsCrossRef
+                    .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap)));
+            cdsSeqDss.addDBRef(cdsCrossRef);
+
+            // problem here is that the cross-reference is synthesized -
+            // cdsSeq.getName() may be like 'CDS|dnaaccession' or
+            // 'CDS|emblcdsacc'
             // assuming cds version same as dna ?!?
-            DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
-                    dnaRef.getVersion(), cdsSeq.getName());
+
+            DBRefEntry proteinToCdsRef = new DBRefEntry(
+                    primRef.getSource(), primRef.getVersion(),
+                    cdsSeq.getName());
+            //
             proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
                     .getInverse()));
             proteinProduct.addDBRef(proteinToCdsRef);
@@ -1864,9 +1883,14 @@ public class AlignmentUtils
    * 
    * @param seq
    * @param mapping
+   * @param dataset
+   *          - existing dataset. We check for sequences that look like the CDS
+   *          we are about to construct, if one exists already, then we will
+   *          just return that one.
    * @return CDS sequence (as a dataset sequence)
    */
-  static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
+  static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,
+          AlignmentI dataset)
   {
     char[] seqChars = seq.getSequence();
     List<int[]> fromRanges = mapping.getMap().getFromRanges();
@@ -1893,7 +1917,7 @@ public class AlignmentUtils
         }
       }
     }
-
+    
     /*
      * assign 'from id' held in the mapping if set (e.g. EMBL protein_id),
      * else generate a sequence name
@@ -1901,12 +1925,124 @@ public class AlignmentUtils
     String mapFromId = mapping.getMappedFromId();
     String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName());
     SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos);
+    if (dataset != null)
+    {
+      SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());
+      if (matches != null)
+      {
+        boolean matched = false;
+        for (SequenceI mtch : matches)
+        {
+          if (mtch.getStart() != newSeq.getStart())
+          {
+            continue;
+          }
+          if (mtch.getEnd() != newSeq.getEnd())
+          {
+            continue;
+          }
+          if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))
+          {
+            continue;
+          }
+          if (!matched)
+          {
+            matched = true;
+            newSeq = mtch;
+          }
+          else
+          {
+            System.err
+                    .println("JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+                            + mtch.toString());
+          }
+        }
+      }
+    }
     // newSeq.setDescription(mapFromId);
 
     return newSeq;
   }
 
   /**
+   * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+   * the given mapping.
+   * 
+   * @param cdsSeq
+   * @param contig
+   * @param mapping
+   * @return list of DBRefEntrys added.
+   */
+  public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+          SequenceI contig, SequenceI proteinProduct, Mapping mapping)
+  {
+
+    // gather direct refs from contig congrent with mapping
+    List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
+    HashSet<String> directSources = new HashSet<String>();
+    if (contig.getDBRefs() != null)
+    {
+      for (DBRefEntry dbr : contig.getDBRefs())
+      {
+        if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+        {
+          MapList map = dbr.getMap().getMap();
+          // check if map is the CDS mapping
+          if (mapping.getMap().equals(map))
+          {
+            direct.add(dbr);
+            directSources.add(dbr.getSource());
+          }
+        }
+      }
+    }
+    DBRefEntry[] onSource = DBRefUtils.selectRefs(
+            proteinProduct.getDBRefs(),
+            directSources.toArray(new String[0]));
+    List<DBRefEntry> propagated = new ArrayList<DBRefEntry>();
+
+    // and generate appropriate mappings
+    for (DBRefEntry cdsref : direct)
+    {
+      // clone maplist and mapping
+      MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[]
+      { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap()
+              .getToRanges(), 3, 1);
+      Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap()
+              .getMap());
+
+      // create dbref
+      DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
+              cdsref.getVersion(), cdsref.getAccessionId(), new Mapping(
+                      cdsmap.getTo(), cdsposmap));
+
+      // and see if we can map to the protein product for this mapping.
+      // onSource is the filtered set of accessions on protein that we are
+      // tranferring, so we assume accession is the same.
+      if (cdsmap.getTo() == null && onSource != null)
+      {
+        List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
+                cdsref.getAccessionId());
+        if (sourceRefs != null)
+        {
+          for (DBRefEntry srcref : sourceRefs)
+          {
+            if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
+            {
+              // we have found a complementary dbref on the protein product, so
+              // update mapping's getTo
+              newref.getMap().setTo(proteinProduct);
+            }
+          }
+        }
+      }
+      cdsSeq.addDBRef(newref);
+      propagated.add(newref);
+    }
+    return propagated;
+  }
+
+  /**
    * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
    * feature start/end ranges, optionally omitting specified feature types.
    * Returns the number of features copied.
@@ -2513,7 +2649,7 @@ public class AlignmentUtils
   {
     AlignmentI copy = new Alignment(new Alignment(seqs));
     copy.setDataset(dataset);
-
+    boolean isProtein = !copy.isNucleotide();
     SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
     if (xrefs != null)
     {
@@ -2524,7 +2660,8 @@ public class AlignmentUtils
         {
           for (DBRefEntry dbref : dbrefs)
           {
-            if (dbref.getMap() == null || dbref.getMap().getTo() == null)
+            if (dbref.getMap() == null || dbref.getMap().getTo() == null
+                    || dbref.getMap().getTo().isProtein() != isProtein)
             {
               continue;
             }
index 288d60e..1295b46 100644 (file)
@@ -222,6 +222,9 @@ public class CrossRef
       boolean found = false;
       DBRefEntry[] xrfs = DBRefUtils
               .selectDbRefs(!fromDna, dss.getDBRefs());
+      // ENST & ENSP comes in to both Protein and nucleotide, so we need to
+      // filter them
+      // out later.
       if ((xrfs == null || xrfs.length == 0) && dataset != null)
       {
         /*
@@ -249,11 +252,15 @@ public class CrossRef
       List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
               source);
       Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+      // At this point, if we are retrieving Ensembl, we still don't filter out
+      // ENST when looking for protein crossrefs.
       while (refIterator.hasNext())
       {
         DBRefEntry xref = refIterator.next();
         found = false;
-        if (xref.hasMap())
+        // we're only interested in coding cross-references, not
+        // locus->transcript
+        if (xref.hasMap() && xref.getMap().getMap().isTripletMap())
         {
           SequenceI mappedTo = xref.getMap().getTo();
           if (mappedTo != null)
@@ -271,20 +278,45 @@ public class CrossRef
              * but findInDataset() matches ENSP when looking for Uniprot...
              */
             SequenceI matchInDataset = findInDataset(xref);
+            if (matchInDataset != null && xref.getMap().getTo() != null
+                    && matchInDataset != xref.getMap().getTo())
+            {
+              System.err
+                      .println("Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref."
+                              + "Found:"
+                              + matchInDataset
+                              + "\nExpected:"
+                              + xref.getMap().getTo()
+                              + "\nFor xref:"
+                              + xref);
+            }
             /*matcher.findIdMatch(mappedTo);*/
             if (matchInDataset != null)
             {
               if (!rseqs.contains(matchInDataset))
               {
                 rseqs.add(matchInDataset);
+                // need to try harder to only add unique mappings
+                if (xref.getMap().getMap().isTripletMap()
+                        && dataset.getMapping(seq, matchInDataset) == null
+                        && cf.getMappingBetween(seq, matchInDataset) == null)
+                {
+                  // materialise a mapping for highlighting between these sequences
+                  if (fromDna)
+                  {
+                    cf.addMap(dss, matchInDataset, xref.getMap().getMap(), xref.getMap().getMappedFromId());
+                  } else {
+                    cf.addMap(matchInDataset, dss, xref.getMap().getMap().getInverse(), xref.getMap().getMappedFromId());
+                  }
+                }
               }
               refIterator.remove();
               continue;
             }
+            // TODO: need to determine if this should be a deriveSequence
             SequenceI rsq = new Sequence(mappedTo);
             rseqs.add(rsq);
-            if (xref.getMap().getMap().getFromRatio() != xref.getMap()
-                    .getMap().getToRatio())
+            if (xref.getMap().getMap().isTripletMap())
             {
               // get sense of map correct for adding to product alignment.
               if (fromDna)
@@ -307,7 +339,9 @@ public class CrossRef
         {
           SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
                   + xref.getAccessionId());
-          if (matchedSeq != null)
+          // if there was a match, check it's at least the right type of
+          // molecule!
+          if (matchedSeq != null && matchedSeq.isProtein() == fromDna)
           {
             if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
             {
@@ -356,6 +390,37 @@ public class CrossRef
     SequenceI[] retrieved = null;
     SequenceI dss = seq.getDatasetSequence() == null ? seq : seq
             .getDatasetSequence();
+    // first filter in case we are retrieving crossrefs that have already been
+    // retrieved. this happens for cases where a database record doesn't yield
+    // protein products for CDS
+    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+    for (SequenceI sq : dataset.getSequences())
+    {
+      boolean dupeFound = false;
+      // !fromDna means we are looking only for nucleotide sequences, not
+      // protein
+      if (sq.isProtein() == fromDna)
+      {
+        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+        {
+          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+          {
+            sourceRefs.remove(found);
+            dupeFound = true;
+          }
+        }
+      }
+      if (dupeFound)
+      {
+        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+      }
+    }
+    if (sourceRefs.size() == 0)
+    {
+      // no more work to do! We already had all requested sequence records in
+      // the dataset.
+      return;
+    }
     try
     {
       retrieved = sftch.getSequences(sourceRefs, !fromDna);
@@ -413,7 +478,11 @@ public class CrossRef
                 }
                 else
                 {
-                  matcher.add(map.getTo());
+                  if (dataset.findIndex(map.getTo()) == -1)
+                  {
+                    dataset.addSequence(map.getTo());
+                    matcher.add(map.getTo());
+                  }
                 }
                 try
                 {
@@ -483,8 +552,11 @@ public class CrossRef
         }
         retrievedSequence.updatePDBIds();
         rseqs.add(retrievedDss);
-        dataset.addSequence(retrievedDss);
-        matcher.add(retrievedDss);
+        if (dataset.findIndex(retrievedDss) == -1)
+        {
+          dataset.addSequence(retrievedDss);
+          matcher.add(retrievedDss);
+        }
       }
     }
   }
@@ -662,24 +734,28 @@ public class CrossRef
           DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
   {
     MapList mapping = null;
-
+    SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom
+            : mapFrom.getDatasetSequence();
+    SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo
+            : mapTo.getDatasetSequence();
     /*
-     * look for a reverse mapping, if found make its inverse
+     * look for a reverse mapping, if found make its inverse. 
+     * Note - we do this on dataset sequences only.
      */
-    if (mapTo.getDBRefs() != null)
+    if (dsmapTo.getDBRefs() != null)
     {
-      for (DBRefEntry dbref : mapTo.getDBRefs())
+      for (DBRefEntry dbref : dsmapTo.getDBRefs())
       {
         String name = dbref.getSource() + "|" + dbref.getAccessionId();
-        if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+        if (dbref.hasMap() && dsmapFrom.getName().startsWith(name))
         {
           /*
            * looks like we've found a map from 'mapTo' to 'mapFrom'
            * - invert it to make the mapping the other way 
            */
           MapList reverse = dbref.getMap().getMap().getInverse();
-          xref.setMap(new Mapping(mapTo, reverse));
-          mappings.addMap(mapFrom, mapTo, reverse);
+          xref.setMap(new Mapping(dsmapTo, reverse));
+          mappings.addMap(mapFrom, dsmapTo, reverse);
           return true;
         }
       }
@@ -706,14 +782,16 @@ public class CrossRef
     /*
      * and add a reverse DbRef with the inverse mapping
      */
-    if (mapFrom.getDatasetSequence() != null
-            && mapFrom.getDatasetSequence().getSourceDBRef() != null)
-    {
-      DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
-              .getSourceDBRef());
-      dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
-              .getInverse()));
-      mapTo.addDBRef(dbref);
+    if (mapFrom.getDatasetSequence() != null && false)
+    // && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+    {
+      // possible need to search primary references... except, why doesn't xref
+      // == getSourceDBRef ??
+      // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+      // .getSourceDBRef());
+      // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+      // .getInverse()));
+      // mapTo.addDBRef(dbref);
     }
 
     if (fromDna)
@@ -789,8 +867,8 @@ public class CrossRef
    *          </ul>
    * @return true if relationship found and sequence added.
    */
-  boolean searchDataset(boolean fromDna, SequenceI fromSeq,
-          DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
+  boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
           boolean direct)
   {
     boolean found = false;
@@ -851,37 +929,38 @@ public class CrossRef
           // }
           if (!cands.isEmpty())
           {
-            if (!foundSeqs.contains(nxt))
+            if (foundSeqs.contains(nxt))
             {
-              found = true;
-              foundSeqs.add(nxt);
-              if (mappings != null && !direct)
+              continue;
+            }
+            found = true;
+            foundSeqs.add(nxt);
+            if (mappings != null && !direct)
+            {
+              /*
+               * if the matched sequence has mapped dbrefs to
+               * protein product / cdna, add equivalent mappings to
+               * our source sequence
+               */
+              for (DBRefEntry candidate : cands)
               {
-                /*
-                 * if the matched sequence has mapped dbrefs to
-                 * protein product / cdna, add equivalent mappings to
-                 * our source sequence
-                 */
-                for (DBRefEntry candidate : cands)
+                Mapping mapping = candidate.getMap();
+                if (mapping != null)
                 {
-                  Mapping mapping = candidate.getMap();
-                  if (mapping != null)
+                  MapList map = mapping.getMap();
+                  if (mapping.getTo() != null
+                          && map.getFromRatio() != map.getToRatio())
                   {
-                    MapList map = mapping.getMap();
-                    if (mapping.getTo() != null
-                            && map.getFromRatio() != map.getToRatio())
+                    /*
+                     * add a mapping, as from dna to peptide sequence
+                     */
+                    if (map.getFromRatio() == 3)
                     {
-                      /*
-                       * add a mapping, as from dna to peptide sequence
-                       */
-                      if (map.getFromRatio() == 3)
-                      {
-                        mappings.addMap(nxt, fromSeq, map);
-                      }
-                      else
-                      {
-                        mappings.addMap(nxt, fromSeq, map.getInverse());
-                      }
+                      mappings.addMap(nxt, fromSeq, map);
+                    }
+                    else
+                    {
+                      mappings.addMap(nxt, fromSeq, map.getInverse());
                     }
                   }
                 }
index 32245b3..701acb6 100644 (file)
@@ -70,4 +70,28 @@ public interface DBRefEntryI
    * @return
    */
   public boolean updateFrom(DBRefEntryI otherEntry);
+
+  /**
+   * Method to distinguish between direct and indirect database references
+   * 
+   * primary references indicate the local sequence data directly corresponds
+   * with the database record. All other references are secondary. direct
+   * references indicate that part or all of the local sequence data can be
+   * mapped with another sequence, enabling annotation transfer.
+   * cross-references indicate the local sequence data can be corresponded to
+   * some other linear coordinate system via a transformation.
+   * 
+   * This method is also sufficient to distinguish direct DBRefEntry mappings
+   * from other relationships - e.g. coding relationships (imply a 1:3/3:1
+   * mapping), but not transcript relationships, which imply a (possibly
+   * non-contiguous) 1:1 mapping
+   * 
+   * The only way a dbref's mappings can be fully verified is via the local
+   * sequence frame, so rather than use isPrimary directly, please use
+   * SequenceI.getPrimaryDbRefs()
+   *
+   * @return true if this reference provides a primary accession for the
+   *         associated sequence object
+   */
+  public boolean isPrimary();
 }
index 32bb761..2f64759 100755 (executable)
@@ -21,6 +21,7 @@
 package jalview.datamodel;
 
 import jalview.analysis.AlignmentUtils;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.io.FastaFile;
 import jalview.util.Comparison;
 import jalview.util.MessageManager;
@@ -225,18 +226,21 @@ public class Alignment implements AlignmentI
   {
     if (dataset != null)
     {
+
       // maintain dataset integrity
-      if (snew.getDatasetSequence() != null)
-      {
-        getDataset().addSequence(snew.getDatasetSequence());
-      }
-      else
+      SequenceI dsseq = snew.getDatasetSequence();
+      if (dsseq == null)
       {
         // derive new sequence
         SequenceI adding = snew.deriveSequence();
-        getDataset().addSequence(adding.getDatasetSequence());
         snew = adding;
+        dsseq = snew.getDatasetSequence();
       }
+      if (getDataset().findIndex(dsseq) == -1)
+      {
+        getDataset().addSequence(dsseq);
+      }
+
     }
     if (sequences == null)
     {
@@ -255,18 +259,22 @@ public class Alignment implements AlignmentI
     }
   }
 
-  /**
-   * Adds a sequence to the alignment. Recalculates maxLength and size.
-   * 
-   * @param snew
-   */
   @Override
-  public void setSequenceAt(int i, SequenceI snew)
+  public SequenceI replaceSequenceAt(int i, SequenceI snew)
   {
     synchronized (sequences)
     {
-      deleteSequence(i);
-      sequences.set(i, snew);
+      if (sequences.size() > i)
+      {
+        return sequences.set(i, snew);
+
+      }
+      else
+      {
+        sequences.add(snew);
+        hiddenSequences.adjustHeightSequenceAdded();
+      }
+      return null;
     }
   }
 
@@ -1029,6 +1037,62 @@ public class Alignment implements AlignmentI
   }
 
   /**
+   * add dataset sequences to seq for currentSeq and any sequences it references
+   */
+  private void resolveAndAddDatasetSeq(SequenceI currentSeq,
+          Set<SequenceI> seqs, boolean createDatasetSequence)
+  {
+    if (currentSeq.getDatasetSequence() != null)
+    {
+      currentSeq = currentSeq.getDatasetSequence();
+    }
+    else
+    {
+      if (createDatasetSequence)
+      {
+        currentSeq = currentSeq.createDatasetSequence();
+      }
+    }
+    if (seqs.contains(currentSeq))
+    {
+      return;
+    }
+    List<SequenceI> toProcess = new ArrayList<SequenceI>();
+    toProcess.add(currentSeq);
+    while (toProcess.size() > 0)
+    {
+      // use a queue ?
+      SequenceI curDs = toProcess.remove(0);
+      if (seqs.contains(curDs))
+      {
+        continue;
+      }
+      seqs.add(curDs);
+      // iterate over database references, making sure we add forward referenced
+      // sequences
+      if (curDs.getDBRefs() != null)
+      {
+        for (DBRefEntry dbr : curDs.getDBRefs())
+        {
+          if (dbr.getMap() != null && dbr.getMap().getTo() != null)
+          {
+            if (dbr.getMap().getTo().getDatasetSequence() != null)
+            {
+              throw new Error("Implementation error: Map.getTo() for dbref"
+                      + dbr + " is not a dataset sequence.");
+              // TODO: if this happens, could also rewrite the reference to
+              // point to new dataset sequence
+            }
+            // we recurse to add all forward references to dataset sequences via
+            // DBRefs/etc
+            toProcess.add(dbr.getMap().getTo());
+          }
+        }
+      }
+    }
+  }
+
+  /**
    * Creates a new dataset for this alignment. Can only be done once - if
    * dataset is not null this will not be performed.
    */
@@ -1038,22 +1102,32 @@ public class Alignment implements AlignmentI
     {
       return;
     }
-    SequenceI[] seqs = new SequenceI[getHeight()];
-    SequenceI currentSeq;
+    // try to avoid using SequenceI.equals at this stage, it will be expensive
+    Set<SequenceI> seqs = new jalview.util.LinkedIdentityHashSet<SequenceI>();
+
     for (int i = 0; i < getHeight(); i++)
     {
-      currentSeq = getSequenceAt(i);
-      if (currentSeq.getDatasetSequence() != null)
-      {
-        seqs[i] = currentSeq.getDatasetSequence();
-      }
-      else
+      SequenceI currentSeq = getSequenceAt(i);
+      resolveAndAddDatasetSeq(currentSeq, seqs, true);
+    }
+
+    // verify all mappings are in dataset
+    for (AlignedCodonFrame cf : codonFrameList)
+    {
+      for (SequenceToSequenceMapping ssm : cf.getMappings())
       {
-        seqs[i] = currentSeq.createDatasetSequence();
+        if (!seqs.contains(ssm.getFromSeq()))
+        {
+          resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
+        }
+        if (!seqs.contains(ssm.getMapping().getTo()))
+        {
+          resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
+        }
       }
     }
-
-    dataset = new Alignment(seqs);
+    // finally construct dataset
+    dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
     // move mappings to the dataset alignment
     dataset.codonFrameList = this.codonFrameList;
     this.codonFrameList = null;
index f1db4c0..1d37fa6 100755 (executable)
@@ -108,11 +108,14 @@ public interface AlignmentI extends AnnotatedCollectionI
    * Used to set a particular index of the alignment with the given sequence.
    * 
    * @param i
-   *          Index of sequence to be updated.
+   *          Index of sequence to be updated. if i>length, sequence will be
+   *          added to end, with no intervening positions.
    * @param seq
-   *          New sequence to be inserted.
+   *          New sequence to be inserted. The existing sequence at position i
+   *          will be replaced.
+   * @return existing sequence (or null if i>current length)
    */
-  void setSequenceAt(int i, SequenceI seq);
+  SequenceI replaceSequenceAt(int i, SequenceI seq);
 
   /**
    * Deletes a sequence from the alignment
index a641b1b..11e77d8 100755 (executable)
@@ -22,9 +22,12 @@ package jalview.datamodel;
 
 import jalview.api.DBRefEntryI;
 
+import java.util.Arrays;
+
 public class DBRefEntry implements DBRefEntryI
 {
   String source = "", version = "", accessionId = "";
+
   /**
    * maps from associated sequence to the database sequence's coordinate system
    */
@@ -35,7 +38,6 @@ public class DBRefEntry implements DBRefEntryI
 
   }
 
-
   public DBRefEntry(String source, String version, String accessionId)
   {
     this(source, version, accessionId, null);
@@ -138,7 +140,8 @@ public class DBRefEntry implements DBRefEntryI
     String otherAccession = other.getAccessionId();
     if ((accessionId == null && otherAccession != null)
             || (accessionId != null && otherAccession == null)
-            || (accessionId != null && !accessionId.equalsIgnoreCase(otherAccession)))
+            || (accessionId != null && !accessionId
+                    .equalsIgnoreCase(otherAccession)))
     {
       return false;
     }
@@ -148,7 +151,7 @@ public class DBRefEntry implements DBRefEntryI
      * otherwise the versions have to match
      */
     String otherVersion = other.getVersion();
-      
+
     if ((version == null || version.equals("0") || version.endsWith(":0"))
             && otherVersion != null)
     {
@@ -223,28 +226,24 @@ public class DBRefEntry implements DBRefEntryI
     return accessionId;
   }
 
-
   @Override
   public void setAccessionId(String accessionId)
   {
     this.accessionId = accessionId;
   }
 
-
   @Override
   public void setSource(String source)
   {
     this.source = source;
   }
 
-
   @Override
   public void setVersion(String version)
   {
     this.version = version;
   }
 
-
   @Override
   public Mapping getMap()
   {
@@ -280,4 +279,53 @@ public class DBRefEntry implements DBRefEntryI
   {
     return getSrcAccString();
   }
+
+  @Override
+  public boolean isPrimary()
+  {
+    /*
+     * if a map is present, unless it is 1:1 and has no SequenceI mate, it cannot be a primary reference.  
+     */
+    if (map != null)
+    {
+      if (map.getTo() != null)
+      {
+        return false;
+      }
+      if (map.getMap().getFromRatio() != map.getMap().getToRatio()
+              || map.getMap().getFromRatio() != 1)
+      {
+        return false;
+      }
+      // check map is really 1:1, no shifts allowed.
+      if (map.getMap().getFromHighest() != map.getMap().getToHighest()
+              && map.getMap().getFromLowest() != map.getMap().getToLowest()
+              && !Arrays.equals(
+                      map.getMap().getFromRanges().toArray(new int[0][]),
+                      map.getMap().getToRanges().toArray(new int[0][])))
+      {
+        return false;
+      }
+    }
+    if (version == null)
+    {
+      // no version string implies the reference has not been verified at all.
+      return false;
+    }
+    // tricky - this test really needs to search the sequence's set of dbrefs to
+    // see if there is a primary reference that derived this reference.
+    String ucv = version.toUpperCase();
+    for (String primsrc : Arrays.asList(DBRefSource.allSources()))
+    {
+      if (ucv.startsWith(primsrc.toUpperCase()))
+      {
+        // by convention, many secondary references inherit the primary
+        // reference's
+        // source string as a prefix for any version information from the
+        // secondary reference.
+        return false;
+      }
+    }
+    return true;
+  }
 }
index fba9211..0ac14e5 100755 (executable)
  */
 package jalview.datamodel;
 
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Defines internal constants for unambiguous annotation of DbRefEntry source
  * strings and describing the data retrieved from external database sources (see
@@ -36,12 +40,12 @@ public class DBRefSource
   /**
    * UNIPROT Accession Number
    */
-  public static String UNIPROT = "UNIPROT";
+  public static final String UNIPROT = "UNIPROT";
 
   /**
    * UNIPROT Entry Name
    */
-  public static String UP_NAME = "UNIPROT_NAME".toUpperCase();
+  public static final String UP_NAME = "UNIPROT_NAME".toUpperCase();
 
   /**
    * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
@@ -54,27 +58,27 @@ public class DBRefSource
   /**
    * PDB Entry Code
    */
-  public static String PDB = "PDB";
+  public static final String PDB = "PDB";
 
   /**
    * EMBL ID
    */
-  public static String EMBL = "EMBL";
+  public static final String EMBL = "EMBL";
 
   /**
    * EMBLCDS ID
    */
-  public static String EMBLCDS = "EMBLCDS";
+  public static final String EMBLCDS = "EMBLCDS";
 
   /**
    * PFAM ID
    */
-  public static String PFAM = "PFAM";
+  public static final String PFAM = "PFAM";
 
   /**
    * RFAM ID
    */
-  public static String RFAM = "RFAM";
+  public static final String RFAM = "RFAM";
 
   /**
    * GeneDB ID
@@ -96,6 +100,25 @@ public class DBRefSource
 
   public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
 
-  public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB,
+  public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB,
       EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein
+
+  public static String[] allSources()
+  {
+    List<String> src = new ArrayList<String>();
+    for (Field f : DBRefSource.class.getFields())
+    {
+      if (String.class.equals(f.getType()))
+      {
+        try
+        {
+          src.add((String) f.get(null));
+        } catch (Exception x)
+        {
+          x.printStackTrace();
+        }
+      }
+    }
+    return src.toArray(new String[0]);
+  }
 }
index a857712..2bbc278 100755 (executable)
@@ -22,6 +22,8 @@ package jalview.datamodel;
 
 import jalview.analysis.AlignSeq;
 import jalview.api.DBRefEntryI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
 import jalview.util.StringUtils;
 
 import java.util.ArrayList;
@@ -57,8 +59,6 @@ public class Sequence extends ASequence implements SequenceI
 
   String vamsasId;
 
-  DBRefEntryI sourceDBRef;
-
   DBRefEntry[] dbrefs;
 
   RNA rna;
@@ -235,8 +235,6 @@ public class Sequence extends ASequence implements SequenceI
             seq.getEnd());
     }
     description = seq.getDescription();
-    sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry(
-            seq.getSourceDBRef());
     if (seq != datasetSequence)
     {
       setDatasetSequence(seq.getDatasetSequence());
@@ -307,8 +305,9 @@ public class Sequence extends ASequence implements SequenceI
               && datasetSequence.getSequenceFeatures() != null
               && datasetSequence.getSequenceFeatures().length > 0)
       {
-        System.err
-              .println("Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment");
+        new Exception(
+                "Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment")
+                .printStackTrace();
       }
       datasetSequence.setSequenceFeatures(features);
     }
@@ -1394,12 +1393,15 @@ public class Sequence extends ASequence implements SequenceI
   @Override
   public PDBEntry getPDBEntry(String pdbIdStr)
   {
-    if (getDatasetSequence() == null
-            || getDatasetSequence().getAllPDBEntries() == null)
+    if (getDatasetSequence() != null)
+    {
+      return getDatasetSequence().getPDBEntry(pdbIdStr);
+    }
+    if (pdbIds == null)
     {
       return null;
     }
-    List<PDBEntry> entries = getDatasetSequence().getAllPDBEntries();
+    List<PDBEntry> entries = getAllPDBEntries();
     for (PDBEntry entry : entries)
     {
       if (entry.getId().equalsIgnoreCase(pdbIdStr))
@@ -1410,16 +1412,66 @@ public class Sequence extends ASequence implements SequenceI
     return null;
   }
 
-  @Override
-  public void setSourceDBRef(DBRefEntryI dbRef)
-  {
-    this.sourceDBRef = dbRef;
-  }
 
   @Override
-  public DBRefEntryI getSourceDBRef()
+  public List<DBRefEntry> getPrimaryDBRefs()
   {
-    return this.sourceDBRef;
+    if (datasetSequence!=null)
+    {
+      return datasetSequence.getPrimaryDBRefs();
+    }
+    if (dbrefs==null || dbrefs.length==0)
+    {
+      return Arrays.asList(new DBRefEntry[0]);
+    }
+    synchronized (dbrefs)
+    {
+      List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
+      DBRefEntry tmp[] = new DBRefEntry[1], res[] = null;
+      for (DBRefEntry ref : dbrefs)
+      {
+        if (!ref.isPrimary())
+        {
+          continue;
+        }
+        if (ref.hasMap())
+        {
+          MapList mp = ref.getMap().getMap();
+          if (mp.getFromLowest() > start || mp.getFromHighest() < end)
+          {
+            // map only involves a subsequence, so cannot be primary
+            continue;
+          }
+        }
+        // whilst it looks like it is a primary ref, we also sanity check type
+        if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals(
+                DBRefUtils.getCanonicalName(ref.getSource())))
+        {
+          // PDB dbrefs imply there should be a PDBEntry associated
+          // TODO: tighten PDB dbrefs
+          // formally imply Jalview has actually downlaoded and
+          // parsed the pdb file. That means there should be a cached file
+          // handle on the PDBEntry, and a real mapping between sequence and
+          // extracted sequence from PDB file
+          PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
+          if (pdbentry != null && pdbentry.getType() != null
+                  && pdbentry.getType().equalsIgnoreCase("PDB"))
+          {
+            primaries.add(ref);
+          }
+          continue;
+        }
+        // check standard protein or dna sources
+        tmp[0] = ref;
+        res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
+        if (res != null && res[0] == tmp[0])
+        {
+          primaries.add(ref);
+          continue;
+        }
+      }
+      return primaries;
+    }
   }
 
 }
index 45a767c..ec7520b 100755 (executable)
@@ -20,8 +20,6 @@
  */
 package jalview.datamodel;
 
-import jalview.api.DBRefEntryI;
-
 import java.util.List;
 import java.util.Vector;
 
@@ -443,21 +441,14 @@ public interface SequenceI extends ASequenceI
    */
   public PDBEntry getPDBEntry(String pdbId);
 
-  /**
-   * Set the distinct source database, and accession number from which a
-   * sequence and its start-end data were derived from. This is very important
-   * for SIFTS mappings and must be set prior to performing SIFTS mapping.
-   * 
-   * @param dbRef
-   *          the source dbRef for the sequence
-   */
-  public void setSourceDBRef(DBRefEntryI dbRef);
 
   /**
-   * Get the distinct source database, and accession number from which a
-   * sequence and its start-end data were derived from.
+   * Get all primary database/accessions for this sequence's data. These
+   * DBRefEntry are expected to resolve to a valid record in the associated
+   * external database, either directly or via a provided 1:1 Mapping.
    * 
-   * @return
+   * @return just the primary references (if any) for this sequence, or an empty
+   *         list
    */
-  public DBRefEntryI getSourceDBRef();
+  public List<DBRefEntry> getPrimaryDBRefs();
 }
index 06e929d..3ba36ca 100644 (file)
@@ -195,7 +195,6 @@ public class EmblEntry
     DBRefEntry retrievedref = new DBRefEntry(sourceDb,
             getSequenceVersion(), accession);
     dna.addDBRef(retrievedref);
-    dna.setSourceDBRef(retrievedref);
     // add map to indicate the sequence is a valid coordinate frame for the
     // dbref
     retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
@@ -504,7 +503,6 @@ public class EmblEntry
             dnaToProteinMapping.setTo(proteinSeq);
             dnaToProteinMapping.setMappedFromId(proteinId);
             proteinSeq.addDBRef(proteinDbRef);
-            proteinSeq.setSourceDBRef(proteinDbRef);
             ref.setMap(dnaToProteinMapping);
           }
           hasUniprotDbref = true;
@@ -549,7 +547,6 @@ public class EmblEntry
                 DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
       }
       product.addDBRef(proteinToEmblProteinRef);
-      product.setSourceDBRef(proteinToEmblProteinRef);
 
       if (dnaToProteinMapping != null
               && dnaToProteinMapping.getTo() != null)
index b4d2783..50e1032 100644 (file)
@@ -174,7 +174,8 @@ public class EnsemblGene extends EnsemblSeqProxy
        */
       else
       {
-        List<String> ids = new EnsemblSymbol(getDomain()).getIds(acc);
+        List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
+                getDbVersion()).getIds(acc);
         for (String geneId : ids)
         {
           if (!geneIds.contains(geneId))
@@ -196,7 +197,8 @@ public class EnsemblGene extends EnsemblSeqProxy
    */
   protected String getGeneIdentifiersForName(String query)
   {
-    List<String> ids = new EnsemblSymbol(getDomain()).getIds(query);
+    List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
+            getDbVersion()).getIds(query);
     if (ids != null)
     {
       for (String id : ids)
index cc002e1..5fccedd 100644 (file)
@@ -276,8 +276,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       {
         // clunky: ensure Uniprot xref if we have one is on mapped sequence
         SequenceI ds = proteinSeq.getDatasetSequence();
-        ds.setSourceDBRef(proteinSeq.getSourceDBRef());
-
+        // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
         Mapping map = new Mapping(ds, mapList);
         DBRefEntry dbr = new DBRefEntry(getDbSource(),
                 getEnsemblDataVersion(), proteinSeq.getName(), map);
@@ -309,7 +308,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       seq = seq.getDatasetSequence();
     }
 
-    EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
+    EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(),
+            getEnsemblDataVersion());
     List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
     for (DBRefEntry xref : xrefs)
     {
@@ -322,7 +322,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     DBRefEntry self = new DBRefEntry(getDbSource(),
             getEnsemblDataVersion(), seq.getName());
     seq.addDBRef(self);
-    seq.setSourceDBRef(self);
   }
 
   /**
@@ -382,7 +381,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         {
           DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
                   getEnsemblDataVersion(), name);
-          sq.setSourceDBRef(dbref);
+          sq.addDBRef(dbref);
         }
       }
       if (alignment == null)
index 1c47f11..b8c8c54 100644 (file)
@@ -25,11 +25,13 @@ public class EnsemblSymbol extends EnsemblXref
   /**
    * Constructor given the target domain to fetch data from
    * 
-   * @param d
+   * @param domain
+   * @param dbName
+   * @param dbVersion
    */
-  public EnsemblSymbol(String d)
+  public EnsemblSymbol(String domain, String dbName, String dbVersion)
   {
-    super(d);
+    super(domain, dbName, dbVersion);
   }
 
   /**
index fa86865..313572f 100644 (file)
@@ -29,20 +29,25 @@ class EnsemblXref extends EnsemblRestClient
 
   private static final String GO_GENE_ONTOLOGY = "GO";
 
+  private String dbName = "ENSEMBL (xref)";
+
   /**
    * Constructor given the target domain to fetch data from
    * 
    * @param d
    */
-  public EnsemblXref(String d)
+  public EnsemblXref(String d, String dbSource, String version)
   {
     super(d);
+    dbName = dbSource;
+    xrefVersion = dbSource + ":" + version;
+    
   }
 
   @Override
   public String getDbName()
   {
-    return "ENSEMBL (xref)";
+    return dbName;
   }
 
   @Override
@@ -152,7 +157,7 @@ class EnsemblXref extends EnsemblRestClient
         if (dbName != null && id != null)
         {
           dbName = DBRefUtils.getCanonicalName(dbName);
-          DBRefEntry dbref = new DBRefEntry(dbName, "0", id);
+          DBRefEntry dbref = new DBRefEntry(dbName, getXRefVersion(), id);
           result.add(dbref);
         }
       }
@@ -163,6 +168,18 @@ class EnsemblXref extends EnsemblRestClient
     return result;
   }
 
+  private String xrefVersion = "ENSEMBL:0";
+
+  /**
+   * version string for Xrefs - for 2.10, hardwired for ENSEMBL:0
+   * 
+   * @return
+   */
+  public String getXRefVersion()
+  {
+    return xrefVersion;
+  }
+
   /**
    * Returns the URL for the REST endpoint to fetch all cross-references for an
    * identifier. Note this may return protein cross-references for nucleotide.
index f6268c0..dd8fb7a 100644 (file)
@@ -32,7 +32,6 @@ import jalview.api.AlignViewControllerI;
 import jalview.api.AlignViewportI;
 import jalview.api.AlignmentViewPanel;
 import jalview.api.FeatureSettingsControllerI;
-import jalview.api.FeatureSettingsModelI;
 import jalview.api.SplitContainerI;
 import jalview.api.ViewStyleI;
 import jalview.api.analysis.ScoreModelI;
@@ -54,7 +53,6 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.AlignmentOrder;
 import jalview.datamodel.AlignmentView;
 import jalview.datamodel.ColumnSelection;
-import jalview.datamodel.DBRefSource;
 import jalview.datamodel.HiddenSequences;
 import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SeqCigar;
@@ -74,7 +72,6 @@ import jalview.io.JalviewFileView;
 import jalview.io.JnetAnnotationMaker;
 import jalview.io.NewickFile;
 import jalview.io.TCoffeeScoreFile;
-import jalview.io.gff.SequenceOntologyI;
 import jalview.jbgui.GAlignFrame;
 import jalview.schemes.Blosum62ColourScheme;
 import jalview.schemes.BuriedColourScheme;
@@ -94,12 +91,10 @@ import jalview.schemes.TaylorColourScheme;
 import jalview.schemes.TurnColourScheme;
 import jalview.schemes.UserColourScheme;
 import jalview.schemes.ZappoColourScheme;
-import jalview.structure.StructureSelectionManager;
 import jalview.util.MessageManager;
 import jalview.viewmodel.AlignmentViewport;
 import jalview.ws.DBRefFetcher;
 import jalview.ws.DBRefFetcher.FetchFinishedListenerI;
-import jalview.ws.SequenceFetcher;
 import jalview.ws.jws1.Discoverer;
 import jalview.ws.jws2.Jws2Discoverer;
 import jalview.ws.jws2.jabaws2.Jws2Instance;
@@ -4676,236 +4671,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
   protected void showProductsFor(final SequenceI[] sel,
           final boolean _odna, final String source)
   {
-    Runnable foo = new Runnable()
-    {
-
-      @Override
-      public void run()
-      {
-        final long sttime = System.currentTimeMillis();
-        AlignFrame.this.setProgressBar(MessageManager.formatMessage(
-                "status.searching_for_sequences_from",
-                new Object[] { source }), sttime);
-        try
-        {
-          AlignmentI alignment = AlignFrame.this.getViewport()
-                  .getAlignment();
-          AlignmentI dataset = alignment.getDataset() == null ? alignment
-                  : alignment.getDataset();
-          boolean dna = alignment.isNucleotide();
-          if (_odna != dna)
-          {
-            System.err
-                    .println("Conflict: showProducts for alignment originally "
-                            + "thought to be "
-                            + (_odna ? "DNA" : "Protein")
-                            + " now searching for "
-                            + (dna ? "DNA" : "Protein") + " Context.");
-          }
-          AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences(
-                  source, dna);
-          if (xrefs == null)
-          {
-            return;
-          }
-          /*
-           * get display scheme (if any) to apply to features
-           */
-          FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
-                  .getFeatureColourScheme(source);
-
-          AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
-                  xrefs);
-          if (!dna)
-          {
-            xrefsAlignment = AlignmentUtils.makeCdsAlignment(
-                    xrefsAlignment.getSequencesArray(), dataset, sel);
-            xrefsAlignment.alignAs(alignment);
-          }
-
-          /*
-           * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
-           * sequences). If we are DNA, drop introns and update mappings
-           */
-          AlignmentI copyAlignment = null;
-
-          if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
-          {
-            boolean copyAlignmentIsAligned = false;
-            if (dna)
-            {
-              copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
-                      xrefsAlignment.getSequencesArray());
-              if (copyAlignment.getHeight() == 0)
-              {
-                JOptionPane.showMessageDialog(AlignFrame.this,
-                        MessageManager.getString("label.cant_map_cds"),
-                        MessageManager.getString("label.operation_failed"),
-                        JOptionPane.OK_OPTION);
-                System.err.println("Failed to make CDS alignment");
-              }
-
-              /*
-               * pending getting Embl transcripts to 'align', 
-               * we are only doing this for Ensembl
-               */
-              // TODO proper criteria for 'can align as cdna'
-              if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
-                      || AlignmentUtils.looksLikeEnsembl(alignment))
-              {
-                copyAlignment.alignAs(alignment);
-                copyAlignmentIsAligned = true;
-              }
-            }
-            else
-            {
-              copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
-                      xrefs.getSequencesArray(), dataset);
-            }
-            copyAlignment.setGapCharacter(AlignFrame.this.viewport
-                    .getGapCharacter());
-
-            StructureSelectionManager ssm = StructureSelectionManager
-                    .getStructureSelectionManager(Desktop.instance);
-
-            /*
-             * register any new mappings for sequence mouseover etc
-             * (will not duplicate any previously registered mappings)
-             */
-            ssm.registerMappings(dataset.getCodonFrames());
-
-            if (copyAlignment.getHeight() <= 0)
-            {
-              System.err.println("No Sequences generated for xRef type "
-                      + source);
-              return;
-            }
-            /*
-             * align protein to dna
-             */
-            if (dna && copyAlignmentIsAligned)
-            {
-              xrefsAlignment.alignAs(copyAlignment);
-            }
-            else
-            {
-              /*
-               * align cdna to protein - currently only if 
-               * fetching and aligning Ensembl transcripts!
-               */
-              // TODO: generalise for other sources of locus/transcript/cds data
-              if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
-              {
-                copyAlignment.alignAs(xrefsAlignment);
-              }
-            }
-          }
-          /*
-           * build AlignFrame(s) according to available alignment data
-           */
-          AlignFrame newFrame = new AlignFrame(xrefsAlignment,
-                  DEFAULT_WIDTH, DEFAULT_HEIGHT);
-          if (Cache.getDefault("HIDE_INTRONS", true))
-          {
-            newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
-          }
-          String newtitle = String.format("%s %s %s",
-                  dna ? MessageManager.getString("label.proteins")
-                          : MessageManager.getString("label.nucleotides"),
-                  MessageManager.getString("label.for"), getTitle());
-          newFrame.setTitle(newtitle);
-
-          if (copyAlignment == null)
-          {
-            /*
-             * split frame display is turned off in preferences file
-             */
-            Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
-                    DEFAULT_HEIGHT);
-            return; // via finally clause
-          }
-          AlignFrame copyThis = new AlignFrame(copyAlignment,
-                  AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
-          copyThis.setTitle(AlignFrame.this.getTitle());
-
-          boolean showSequenceFeatures = viewport.isShowSequenceFeatures();
-          newFrame.setShowSeqFeatures(showSequenceFeatures);
-          copyThis.setShowSeqFeatures(showSequenceFeatures);
-          FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
-                  .getFeatureRenderer();
-
-          /*
-           * copy feature rendering settings to split frame
-           */
-          newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
-                  .transferSettings(myFeatureStyling);
-          copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
-                  .transferSettings(myFeatureStyling);
-
-          /*
-           * apply 'database source' feature configuration
-           * if any was found
-           */
-          // TODO is this the feature colouring for the original
-          // alignment or the fetched xrefs? either could be Ensembl
-          newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
-          copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
-
-          SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
-                  dna ? newFrame : copyThis);
-          newFrame.setVisible(true);
-          copyThis.setVisible(true);
-          String linkedTitle = MessageManager
-                  .getString("label.linked_view_title");
-          Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
-          sf.adjustDivider();
-        } catch (OutOfMemoryError e)
-        {
-          new OOMWarning("whilst fetching crossreferences", e);
-        } catch (Throwable e)
-        {
-          Cache.log.error("Error when finding crossreferences", e);
-        } finally
-        {
-          AlignFrame.this.setProgressBar(MessageManager.formatMessage(
-                  "status.finished_searching_for_sequences_from",
-                  new Object[] { source }), sttime);
-        }
-      }
-
-      /**
-       * Makes an alignment containing the given sequences, and adds them to the
-       * given dataset, which is also set as the dataset for the new alignment
-       * 
-       * TODO: refactor to DatasetI method
-       * 
-       * @param dataset
-       * @param seqs
-       * @return
-       */
-      protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
-              AlignmentI seqs)
-      {
-        SequenceI[] sprods = new SequenceI[seqs.getHeight()];
-        for (int s = 0; s < sprods.length; s++)
-        {
-          sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
-          if (dataset.getSequences() == null
-                  || !dataset.getSequences().contains(
-                          sprods[s].getDatasetSequence()))
-          {
-            dataset.addSequence(sprods[s].getDatasetSequence());
-          }
-          sprods[s].updatePDBIds();
-        }
-        Alignment al = new Alignment(sprods);
-        al.setDataset(dataset);
-        return al;
-      }
-
-    };
-    Thread frunner = new Thread(foo);
-    frunner.start();
+    new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this))
+            .start();
   }
 
   /**
diff --git a/src/jalview/gui/CrossRefAction.java b/src/jalview/gui/CrossRefAction.java
new file mode 100644 (file)
index 0000000..32af226
--- /dev/null
@@ -0,0 +1,312 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import jalview.analysis.AlignmentUtils;
+import jalview.analysis.CrossRef;
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureSettingsModelI;
+import jalview.bin.Cache;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.structure.StructureSelectionManager;
+import jalview.util.MessageManager;
+import jalview.ws.SequenceFetcher;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.JOptionPane;
+
+/**
+ * Factory constructor and runnable for discovering and displaying
+ * cross-references for a set of aligned sequences
+ * 
+ * @author jprocter
+ *
+ */
+public class CrossRefAction implements Runnable
+{
+  private AlignFrame alignFrame;
+
+  private SequenceI[] sel;
+
+  private boolean _odna;
+
+  private String source;
+
+  List<AlignmentViewPanel> xrefViews = new ArrayList<AlignmentViewPanel>();
+
+  public List<jalview.api.AlignmentViewPanel> getXrefViews()
+  {
+    return xrefViews;
+  }
+
+  @Override
+  public void run()
+  {
+    final long sttime = System.currentTimeMillis();
+    alignFrame.setProgressBar(
+            MessageManager.formatMessage(
+                    "status.searching_for_sequences_from",
+                    new Object[] { source }), sttime);
+    try
+    {
+      AlignmentI alignment = alignFrame.getViewport().getAlignment();
+      AlignmentI dataset = alignment.getDataset() == null ? alignment
+              : alignment.getDataset();
+      boolean dna = alignment.isNucleotide();
+      if (_odna != dna)
+      {
+        System.err
+                .println("Conflict: showProducts for alignment originally "
+                        + "thought to be " + (_odna ? "DNA" : "Protein")
+                        + " now searching for " + (dna ? "DNA" : "Protein")
+                        + " Context.");
+      }
+      AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences(
+              source, dna);
+      if (xrefs == null)
+      {
+        return;
+      }
+      /*
+       * get display scheme (if any) to apply to features
+       */
+      FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
+              .getFeatureColourScheme(source);
+
+      AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
+              xrefs);
+      if (!dna)
+      {
+        xrefsAlignment = AlignmentUtils.makeCdsAlignment(
+                xrefsAlignment.getSequencesArray(), dataset, sel);
+        xrefsAlignment.alignAs(alignment);
+      }
+
+      /*
+       * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
+       * sequences). If we are DNA, drop introns and update mappings
+       */
+      AlignmentI copyAlignment = null;
+
+      if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+      {
+        boolean copyAlignmentIsAligned = false;
+        if (dna)
+        {
+          copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+                  xrefsAlignment.getSequencesArray());
+          if (copyAlignment.getHeight() == 0)
+          {
+            JOptionPane.showMessageDialog(alignFrame,
+                    MessageManager.getString("label.cant_map_cds"),
+                    MessageManager.getString("label.operation_failed"),
+                    JOptionPane.OK_OPTION);
+            System.err.println("Failed to make CDS alignment");
+          }
+
+          /*
+           * pending getting Embl transcripts to 'align', 
+           * we are only doing this for Ensembl
+           */
+          // TODO proper criteria for 'can align as cdna'
+          if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+                  || AlignmentUtils.looksLikeEnsembl(alignment))
+          {
+            copyAlignment.alignAs(alignment);
+            copyAlignmentIsAligned = true;
+          }
+        }
+        else
+        {
+          copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+                  xrefs.getSequencesArray(), dataset);
+        }
+        copyAlignment
+                .setGapCharacter(alignFrame.viewport.getGapCharacter());
+
+        StructureSelectionManager ssm = StructureSelectionManager
+                .getStructureSelectionManager(Desktop.instance);
+
+        /*
+         * register any new mappings for sequence mouseover etc
+         * (will not duplicate any previously registered mappings)
+         */
+        ssm.registerMappings(dataset.getCodonFrames());
+
+        if (copyAlignment.getHeight() <= 0)
+        {
+          System.err.println("No Sequences generated for xRef type "
+                  + source);
+          return;
+        }
+        /*
+         * align protein to dna
+         */
+        if (dna && copyAlignmentIsAligned)
+        {
+          xrefsAlignment.alignAs(copyAlignment);
+        }
+        else
+        {
+          /*
+           * align cdna to protein - currently only if 
+           * fetching and aligning Ensembl transcripts!
+           */
+          // TODO: generalise for other sources of locus/transcript/cds data
+          if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
+          {
+            copyAlignment.alignAs(xrefsAlignment);
+          }
+        }
+      }
+      /*
+       * build AlignFrame(s) according to available alignment data
+       */
+      AlignFrame newFrame = new AlignFrame(xrefsAlignment,
+              AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+      if (Cache.getDefault("HIDE_INTRONS", true))
+      {
+        newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
+      }
+      String newtitle = String.format("%s %s %s",
+              dna ? MessageManager.getString("label.proteins")
+                      : MessageManager.getString("label.nucleotides"),
+              MessageManager.getString("label.for"), alignFrame.getTitle());
+      newFrame.setTitle(newtitle);
+
+      if (copyAlignment == null)
+      {
+        /*
+         * split frame display is turned off in preferences file
+         */
+        Desktop.addInternalFrame(newFrame, newtitle,
+                AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+        xrefViews.add(newFrame.alignPanel);
+        return; // via finally clause
+      }
+      AlignFrame copyThis = new AlignFrame(copyAlignment,
+              AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+      copyThis.setTitle(alignFrame.getTitle());
+
+      boolean showSequenceFeatures = alignFrame.getViewport()
+              .isShowSequenceFeatures();
+      newFrame.setShowSeqFeatures(showSequenceFeatures);
+      copyThis.setShowSeqFeatures(showSequenceFeatures);
+      FeatureRenderer myFeatureStyling = alignFrame.alignPanel
+              .getSeqPanel().seqCanvas.getFeatureRenderer();
+
+      /*
+       * copy feature rendering settings to split frame
+       */
+      newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+              .transferSettings(myFeatureStyling);
+      copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+              .transferSettings(myFeatureStyling);
+
+      /*
+       * apply 'database source' feature configuration
+       * if any was found
+       */
+      // TODO is this the feature colouring for the original
+      // alignment or the fetched xrefs? either could be Ensembl
+      newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
+      copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
+
+      SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
+              dna ? newFrame : copyThis);
+      newFrame.setVisible(true);
+      copyThis.setVisible(true);
+      String linkedTitle = MessageManager
+              .getString("label.linked_view_title");
+      Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+      sf.adjustDivider();
+
+      // finally add the top, then bottom frame to the view list
+      xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
+      xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
+
+    } catch (OutOfMemoryError e)
+    {
+      new OOMWarning("whilst fetching crossreferences", e);
+    } catch (Throwable e)
+    {
+      Cache.log.error("Error when finding crossreferences", e);
+    } finally
+    {
+      alignFrame.setProgressBar(MessageManager.formatMessage(
+              "status.finished_searching_for_sequences_from",
+              new Object[] { source }), sttime);
+    }
+  }
+
+  /**
+   * Makes an alignment containing the given sequences, and adds them to the
+   * given dataset, which is also set as the dataset for the new alignment
+   * 
+   * TODO: refactor to DatasetI method
+   * 
+   * @param dataset
+   * @param seqs
+   * @return
+   */
+  protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
+          AlignmentI seqs)
+  {
+    SequenceI[] sprods = new SequenceI[seqs.getHeight()];
+    for (int s = 0; s < sprods.length; s++)
+    {
+      sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
+      if (dataset.getSequences() == null
+              || !dataset.getSequences().contains(
+                      sprods[s].getDatasetSequence()))
+      {
+        dataset.addSequence(sprods[s].getDatasetSequence());
+      }
+      sprods[s].updatePDBIds();
+    }
+    Alignment al = new Alignment(sprods);
+    al.setDataset(dataset);
+    return al;
+  }
+
+  public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel,
+          boolean _odna, String source)
+  {
+    this.alignFrame = alignFrame;
+    this.sel = sel;
+    this._odna = _odna;
+    this.source = source;
+  }
+
+  public static CrossRefAction showProductsFor(final SequenceI[] sel,
+          final boolean _odna, final String source,
+          final AlignFrame alignFrame)
+  {
+    return new CrossRefAction(alignFrame, sel, _odna, source);
+  }
+
+}
index 68245b6..c80f3de 100644 (file)
@@ -365,6 +365,12 @@ public class Jalview2XML
       public jalview.datamodel.Mapping mp = _jmap;
 
       @Override
+      public boolean isResolvable()
+      {
+        return super.isResolvable() && mp.getTo() != null;
+      };
+
+      @Override
       boolean resolve()
       {
         SequenceI seq = getSrefDatasetSeq();
@@ -787,37 +793,42 @@ public class Jalview2XML
 
     JSeq jseq;
     Set<String> calcIdSet = new HashSet<String>();
-
+    // record the set of vamsas sequence XML POJO we create.
+    HashMap<String,Sequence> vamsasSetIds = new HashMap<String,Sequence>(); 
     // SAVE SEQUENCES
     for (final SequenceI jds : rjal.getSequences())
     {
       final SequenceI jdatasq = jds.getDatasetSequence() == null ? jds
               : jds.getDatasetSequence();
       String id = seqHash(jds);
-
-      if (seqRefIds.get(id) != null)
-      {
-        // This happens for two reasons: 1. multiple views are being serialised.
-        // 2. the hashCode has collided with another sequence's code. This DOES
-        // HAPPEN! (PF00072.15.stk does this)
-        // JBPNote: Uncomment to debug writing out of files that do not read
-        // back in due to ArrayOutOfBoundExceptions.
-        // System.err.println("vamsasSeq backref: "+id+"");
-        // System.err.println(jds.getName()+"
-        // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString());
-        // System.err.println("Hashcode: "+seqHash(jds));
-        // SequenceI rsq = (SequenceI) seqRefIds.get(id + "");
-        // System.err.println(rsq.getName()+"
-        // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString());
-        // System.err.println("Hashcode: "+seqHash(rsq));
-      }
-      else
-      {
-        vamsasSeq = createVamsasSequence(id, jds);
-        vamsasSet.addSequence(vamsasSeq);
-        seqRefIds.put(id, jds);
+      if (vamsasSetIds.get(id) == null)
+      {
+        if (seqRefIds.get(id) != null && !storeDS)
+        {
+          // This happens for two reasons: 1. multiple views are being
+          // serialised.
+          // 2. the hashCode has collided with another sequence's code. This
+          // DOES
+          // HAPPEN! (PF00072.15.stk does this)
+          // JBPNote: Uncomment to debug writing out of files that do not read
+          // back in due to ArrayOutOfBoundExceptions.
+          // System.err.println("vamsasSeq backref: "+id+"");
+          // System.err.println(jds.getName()+"
+          // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString());
+          // System.err.println("Hashcode: "+seqHash(jds));
+          // SequenceI rsq = (SequenceI) seqRefIds.get(id + "");
+          // System.err.println(rsq.getName()+"
+          // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString());
+          // System.err.println("Hashcode: "+seqHash(rsq));
+        }
+        else
+        {
+          vamsasSeq = createVamsasSequence(id, jds);
+          vamsasSet.addSequence(vamsasSeq);
+          vamsasSetIds.put(id, vamsasSeq);
+          seqRefIds.put(id, jds);
+        }
       }
-
       jseq = new JSeq();
       jseq.setStart(jds.getStart());
       jseq.setEnd(jds.getEnd());
@@ -2808,15 +2819,28 @@ public class Jalview2XML
           { 
             System.err
                     .println("Warning JAL-2154 regression: updating start/end for sequence "
-                    + tmpSeq.toString());
+                            + tmpSeq.toString() + " to " + jseqs[i]);
           }
         } else {
           incompleteSeqs.remove(seqId);
         }
+        if (vamsasSeq.length > vi && vamsasSeq[vi].getId().equals(seqId))
+        {
+          // most likely we are reading a dataset XML document so
+          // update from vamsasSeq section of XML for this sequence
+          tmpSeq.setName(vamsasSeq[vi].getName());
+          tmpSeq.setDescription(vamsasSeq[vi].getDescription());
+          tmpSeq.setSequence(vamsasSeq[vi].getSequence());
+          vi++;
+        }
+        else
+        {
+          // reading multiple views, so vamsasSeq set is a subset of JSeq
+          multipleView = true;
+        }
         tmpSeq.setStart(jseqs[i].getStart());
         tmpSeq.setEnd(jseqs[i].getEnd());
         tmpseqs.add(tmpSeq);
-        multipleView = true;
       }
       else
       {
@@ -2905,6 +2929,12 @@ public class Jalview2XML
     {
       // load sequence features, database references and any associated PDB
       // structures for the alignment
+      //
+      // prior to 2.10, this part would only be executed the first time a
+      // sequence was encountered, but not afterwards.
+      // now, for 2.10 projects, this is also done if the xml doc includes
+      // dataset sequences not actually present in any particular view.
+      //
       for (int i = 0; i < vamsasSeq.length; i++)
       {
         if (jseqs[i].getFeaturesCount() > 0)
@@ -2931,13 +2961,17 @@ public class Jalview2XML
               }
 
             }
-
-            al.getSequenceAt(i).getDatasetSequence().addSequenceFeature(sf);
+            // adds feature to datasequence's feature set (since Jalview 2.10)
+            al.getSequenceAt(i).addSequenceFeature(sf);
           }
         }
         if (vamsasSeq[i].getDBRefCount() > 0)
         {
-          addDBRefs(al.getSequenceAt(i).getDatasetSequence(), vamsasSeq[i]);
+          // adds dbrefs to datasequence's set (since Jalview 2.10)
+          addDBRefs(
+                  al.getSequenceAt(i).getDatasetSequence() == null ? al.getSequenceAt(i)
+                          : al.getSequenceAt(i).getDatasetSequence(),
+                  vamsasSeq[i]);
         }
         if (jseqs[i].getPdbidsCount() > 0)
         {
@@ -2970,7 +3004,15 @@ public class Jalview2XML
             }
             StructureSelectionManager.getStructureSelectionManager(
                     Desktop.instance).registerPDBEntry(entry);
-            al.getSequenceAt(i).getDatasetSequence().addPDBId(entry);
+            // adds PDBEntry to datasequence's set (since Jalview 2.10)
+            if (al.getSequenceAt(i).getDatasetSequence() != null)
+            {
+              al.getSequenceAt(i).getDatasetSequence().addPDBId(entry);
+            }
+            else
+            {
+              al.getSequenceAt(i).addPDBId(entry);
+            }
           }
         }
       }
@@ -2999,16 +3041,16 @@ public class Jalview2XML
             if (maps[m].getMapping() != null)
             {
               mapping = addMapping(maps[m].getMapping());
-            }
-            if (dnaseq != null && mapping.getTo() != null)
-            {
-              cf.addMap(dnaseq, mapping.getTo(), mapping.getMap());
-            }
-            else
-            {
-              // defer to later
-              frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf,
-                      mapping));
+              if (dnaseq != null && mapping.getTo() != null)
+              {
+                cf.addMap(dnaseq, mapping.getTo(), mapping.getMap());
+              }
+              else
+              {
+                // defer to later
+                frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf,
+                        mapping));
+              }
             }
           }
           al.addCodonFrame(cf);
@@ -4898,7 +4940,7 @@ public class Jalview2XML
     for (int i = 0, iSize = vamsasSet.getSequenceCount(); i < iSize; i++)
     {
       Sequence vamsasSeq = vamsasSet.getSequence(i);
-      ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed);
+      ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed, i);
     }
     // create a new dataset
     if (ds == null)
@@ -4925,18 +4967,29 @@ public class Jalview2XML
    *          dataset alignment
    * @param dseqs
    *          vector to add new dataset sequence to
+   * @param ignoreUnrefed
+   *          - when true, don't create new sequences from vamsasSeq if it's id
+   *          doesn't already have an asssociated Jalview sequence.
+   * @param vseqpos
+   *          - used to reorder the sequence in the alignment according to the
+   *          vamsasSeq array ordering, to preserve ordering of dataset
    */
   private void ensureJalviewDatasetSequence(Sequence vamsasSeq,
-          AlignmentI ds, Vector dseqs, boolean ignoreUnrefed)
+          AlignmentI ds, Vector dseqs, boolean ignoreUnrefed, int vseqpos)
   {
     // JBP TODO: Check this is called for AlCodonFrames to support recovery of
     // xRef Codon Maps
     SequenceI sq = seqRefIds.get(vamsasSeq.getId());
+    boolean reorder = false;
     SequenceI dsq = null;
     if (sq != null && sq.getDatasetSequence() != null)
     {
       dsq = sq.getDatasetSequence();
     }
+    else
+    {
+      reorder = true;
+    }
     if (sq == null && ignoreUnrefed)
     {
       return;
@@ -5032,6 +5085,35 @@ public class Jalview2XML
         // + (post ? "appended" : ""));
       }
     }
+    else
+    {
+      // sequence refs are identical. We may need to update the existing dataset
+      // alignment with this one, though.
+      if (ds != null && dseqs == null)
+      {
+        int opos = ds.findIndex(dsq);
+        SequenceI tseq = null;
+        if (opos != -1 && vseqpos != opos)
+        {
+          // remove from old position
+          ds.deleteSequence(dsq);
+        }
+        if (vseqpos < ds.getHeight())
+        {
+          if (vseqpos != opos)
+          {
+            // save sequence at destination position
+            tseq = ds.getSequenceAt(vseqpos);
+            ds.replaceSequenceAt(vseqpos, dsq);
+            ds.addSequence(tseq);
+          }
+        }
+        else
+        {
+          ds.addSequence(dsq);
+        }
+      }
+    }
   }
 
   /*
index afe6754..5d4ea68 100755 (executable)
@@ -43,6 +43,7 @@ import java.awt.event.KeyAdapter;
 import java.awt.event.KeyEvent;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 
@@ -200,8 +201,19 @@ public class SequenceFetcher extends JPanel implements Runnable
 
   private IProgressIndicator progressIndicator;
 
+  private volatile boolean _isConstructing = false;
+
+  private List<AlignFrame> newAlframes = null;
+
   public SequenceFetcher(IProgressIndicator guiIndic)
   {
+    this(guiIndic, null, null);
+  }
+
+  public SequenceFetcher(IProgressIndicator guiIndic,
+          final String selectedDb, final String queryString)
+  {
+    this._isConstructing=true;
     this.progressIndicator = guiIndic;
     final SequenceFetcher us = this;
     // launch initialiser thread
@@ -213,7 +225,8 @@ public class SequenceFetcher extends JPanel implements Runnable
       {
         if (getSequenceFetcherSingleton(progressIndicator) != null)
         {
-          us.initGui(progressIndicator);
+          us.initGui(progressIndicator, selectedDb, queryString);
+          us._isConstructing=false;
         }
         else
         {
@@ -239,6 +252,26 @@ public class SequenceFetcher extends JPanel implements Runnable
     });
     sf.start();
   }
+  /**
+   * blocking call which creates a new sequence fetcher panel, configures it and presses the OK button with the given database and query.
+   * @param database
+   * @param query
+   */
+  public static List<AlignFrame> fetchAndShow(String database, String query)
+  {
+    final SequenceFetcher sf = new SequenceFetcher(Desktop.instance, database, query);
+    while (sf._isConstructing)
+    {
+      try { Thread.sleep(50);
+      } catch (Exception q)
+      {
+        return Collections.emptyList();
+      }
+    }
+    sf.newAlframes = new ArrayList<AlignFrame>();
+    sf.run();
+    return sf.newAlframes;
+  }
 
   private class DatabaseAuthority extends DefaultMutableTreeNode
   {
@@ -249,13 +282,59 @@ public class SequenceFetcher extends JPanel implements Runnable
   {
 
   };
+  
+  /**
+   * initialise the database and query for this fetcher panel
+   * 
+   * @param selectedDb
+   *          - string that should correspond to a sequence fetcher
+   * @param queryString
+   *          - string that will be entered in the query dialog
+   * @return true if UI was configured with valid database and query string
+   */
+  protected boolean setInitialQuery(String selectedDb, String queryString)
+  {
+    if (selectedDb == null || selectedDb.trim().length() == 0)
+    {
+      return false;
+    }
+    try
+    {
+      List<DbSourceProxy> sp = sfetch.getSourceProxy(selectedDb);
+      for (DbSourceProxy sourcep : sp)
+      {
+        if (sourcep.getTier() == 0)
+        {
+          database.selection = Arrays
+                  .asList(new DbSourceProxy[] { sourcep });
+          break;
+        }
+      }
+      if (database.selection == null || database.selection.size() == 0)
+      {
+        System.err.println("Ignoring fetch parameter db='" + selectedDb
+                + "'");
+        return false;
+      }
+      textArea.setText(queryString);
+    } catch (Exception q)
+    {
+      System.err.println("Ignoring fetch parameter db='" + selectedDb
+              + "' and query='" + queryString + "'");
+      return false;
+    }
+    return true;
+  }
 
   /**
    * called by thread spawned by constructor
    * 
    * @param guiWindow
+   * @param queryString
+   * @param selectedDb
    */
-  private void initGui(IProgressIndicator guiWindow)
+  private void initGui(IProgressIndicator guiWindow, String selectedDb,
+          String queryString)
   {
     this.guiWindow = guiWindow;
     if (guiWindow instanceof AlignFrame)
@@ -266,6 +345,16 @@ public class SequenceFetcher extends JPanel implements Runnable
     try
     {
       jbInit();
+      /*
+       * configure the UI with any query parameters we were called with
+       */
+      if (!setInitialQuery(selectedDb, queryString))
+      {
+        /*
+         * none provided, so show the database chooser
+         */
+        database.waitForInput();
+      }
     } catch (Exception ex)
     {
       ex.printStackTrace();
@@ -419,11 +508,6 @@ public class SequenceFetcher extends JPanel implements Runnable
     this.add(jPanel3, java.awt.BorderLayout.CENTER);
     this.add(jPanel2, java.awt.BorderLayout.NORTH);
     jScrollPane1.getViewport().add(textArea);
-
-    /*
-     * open the database tree
-     */
-    database.waitForInput();
   }
 
   private void pdbSourceAction()
@@ -936,7 +1020,10 @@ public class SequenceFetcher extends JPanel implements Runnable
         {
           af.hideFeatureColumns(SequenceOntologyI.EXON, false);
         }
-
+        if (newAlframes != null)
+        {
+          newAlframes.add(af);
+        }
         Desktop.addInternalFrame(af, title, AlignFrame.DEFAULT_WIDTH,
                 AlignFrame.DEFAULT_HEIGHT);
 
index 13fa460..b2cc70f 100644 (file)
@@ -867,7 +867,7 @@ public class StructureChooser extends GStructureChooser implements
       ArrayList<SequenceI> seqsWithoutSourceDBRef = new ArrayList<SequenceI>();
       for (SequenceI seq : sequences)
       {
-        if (seq.getSourceDBRef() == null && seq.getDBRefs() == null)
+        if (seq.getPrimaryDBRefs().size() == 0)
         {
             seqsWithoutSourceDBRef.add(seq);
             continue;
index fc0e207..f095383 100644 (file)
@@ -117,7 +117,9 @@ public abstract class StructureFile extends AlignFile
     DBRefEntry sourceDBRef = new DBRefEntry();
     sourceDBRef.setAccessionId(getId());
     sourceDBRef.setSource(DBRefSource.PDB);
-    pdbSequence.setSourceDBRef(sourceDBRef);
+    // TODO: specify version for 'PDB' database ref if it is read from a file.
+    // TODO: decide if jalview.io should be creating primary refs!
+    sourceDBRef.setVersion("");
     pdbSequence.addPDBId(entry);
     pdbSequence.addDBRef(sourceDBRef);
     SequenceI chainseq = pdbSequence;
index be042e6..182a48f 100644 (file)
@@ -502,7 +502,7 @@ public class StructureSelectionManager
       }
 
       ArrayList<StructureMapping> seqToStrucMapping = new ArrayList<StructureMapping>();
-      if (isMapUsingSIFTs)
+      if (isMapUsingSIFTs && seq.isProtein())
       {
         setProgressBar(null);
         setProgressBar(MessageManager
@@ -585,6 +585,20 @@ public class StructureSelectionManager
     return "cif".equalsIgnoreCase(fileExt);
   }
 
+  /**
+   * retrieve a mapping for seq from SIFTs using associated DBRefEntry for
+   * uniprot or PDB
+   * 
+   * @param seq
+   * @param pdbFile
+   * @param targetChainId
+   * @param pdb
+   * @param maxChain
+   * @param sqmpping
+   * @param maxAlignseq
+   * @return
+   * @throws SiftsException
+   */
   private StructureMapping getStructureMapping(SequenceI seq,
           String pdbFile, String targetChainId, StructureFile pdb,
           PDBChain maxChain, jalview.datamodel.Mapping sqmpping,
index d5d0cf5..405f6e6 100755 (executable)
@@ -31,6 +31,7 @@ import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import com.stevesoft.pat.Regex;
 
@@ -59,6 +60,18 @@ public class DBRefUtils
 
     canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
     canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);
+    // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served
+    // from ENA.
+    canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
+    canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
+
+    // Make sure we have lowercase entries for all canonical string lookups
+    Set<String> keys = canonicalSourceNameLookup.keySet();
+    for (String k : keys)
+    {
+      canonicalSourceNameLookup.put(k.toLowerCase(),
+              canonicalSourceNameLookup.get(k));
+    }
 
     dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
     dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
@@ -235,7 +248,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() == null
-              || refb.getSource().equals(refa.getSource()))
+              || DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         if (refa.getVersion() == null
                 || refb.getVersion().equals(refa.getVersion()))
@@ -266,7 +280,7 @@ public class DBRefUtils
     @Override
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
-      if (nullOrEqual(refa.getSource(), refb.getSource())
+      if (nullOrEqualSource(refa.getSource(), refb.getSource())
               && nullOrEqual(refa.getVersion(), refb.getVersion())
               && nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
               && nullOrEqual(refa.getMap(), refb.getMap()))
@@ -288,7 +302,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
@@ -319,7 +334,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         if (refa.getAccessionId() != null && refb.getAccessionId() != null
@@ -355,7 +371,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
         // if ((refa.getVersion()==null || refb.getVersion()==null)
@@ -394,7 +411,8 @@ public class DBRefUtils
     public boolean matches(DBRefEntry refa, DBRefEntry refb)
     {
       if (refa.getSource() != null && refb.getSource() != null
-              && refb.getSource().equals(refa.getSource()))
+              && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+                      DBRefUtils.getCanonicalName(refa.getSource())))
       {
         // We dont care about version
 
@@ -521,7 +539,28 @@ public class DBRefUtils
     {
       return true;
     }
-    return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+    return o1.equals(o2);
+  }
+
+  /**
+   * canonicalise source string before comparing. null is always wildcard
+   * 
+   * @param o1
+   *          - null or source string to compare
+   * @param o2
+   *          - null or source string to compare
+   * @return true if either o1 or o2 are null, or o1 equals o2 under
+   *         DBRefUtils.getCanonicalName
+   *         (o1).equals(DBRefUtils.getCanonicalName(o2))
+   */
+  public static boolean nullOrEqualSource(String o1, String o2)
+  {
+    if (o1 == null || o2 == null)
+    {
+      return true;
+    }
+    return DBRefUtils.getCanonicalName(o1).equals(
+            DBRefUtils.getCanonicalName(o2));
   }
 
   /**
diff --git a/src/jalview/util/LinkedIdentityHashSet.java b/src/jalview/util/LinkedIdentityHashSet.java
new file mode 100644 (file)
index 0000000..5cdbeb1
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.util;
+
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+
+/**
+ * Order preserving Set based on System.identityHashCode() for an object, which
+ * also supports Object->index lookup.
+ * 
+ * @author Jim Procter (2016) based on Evgeniy Dorofeev's response: via
+ *         https://stackoverflow.com/questions/17276658/linkedidentityhashset
+ * 
+ */
+public class LinkedIdentityHashSet<E> extends AbstractSet<E>
+{
+  LinkedHashMap<IdentityWrapper, IdentityWrapper> set = new LinkedHashMap<IdentityWrapper, IdentityWrapper>();
+
+  static class IdentityWrapper
+  {
+    Object obj;
+
+    public int p;
+
+    IdentityWrapper(Object obj, int p)
+    {
+      this.obj = obj;
+      this.p = p;
+    }
+
+    @Override
+    public boolean equals(Object obj)
+    {
+      return this.obj == obj;
+    }
+
+    @Override
+    public int hashCode()
+    {
+      return System.identityHashCode(obj);
+    }
+  }
+
+  @Override
+  public boolean add(E e)
+  {
+    IdentityWrapper el = (new IdentityWrapper(e, set.size()));
+    return set.putIfAbsent(el, el) == null;
+  }
+
+  @Override
+  public Iterator<E> iterator()
+  {
+    return new Iterator<E>()
+    {
+      final Iterator<IdentityWrapper> se = set.keySet().iterator();
+
+      @Override
+      public boolean hasNext()
+      {
+        return se.hasNext();
+      }
+
+      @SuppressWarnings("unchecked")
+      @Override
+      public E next()
+      {
+        return (E) se.next().obj;
+      }
+    };
+  }
+
+  @Override
+  public int size()
+  {
+    return set.size();
+  }
+
+  /**
+   * Lookup the index for e in the set
+   * 
+   * @param e
+   * @return position of e in the set when it was added.
+   */
+  public int indexOf(E e)
+  {
+    return set.get(e).p;
+  }
+}
index cae968e..dc5bee8 100644 (file)
@@ -1103,4 +1103,14 @@ public class MapList
     return forwardStrand;
   }
 
+  /**
+   * 
+   * @return true if from, or to is a three to 1 mapping
+   */
+  public boolean isTripletMap()
+  {
+    return (toRatio == 3 && fromRatio == 1)
+            || (fromRatio == 3 && toRatio == 1);
+  }
+
 }
index 3ba0e34..ca403c5 100644 (file)
@@ -176,6 +176,9 @@ public class DBRefFetcher implements Runnable
           srces.addAll(srcesfordb);
         }
       }
+      // append the PDB data source, since it is 'special', catering for both
+      // nucleotide and protein
+      srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
 
       // append the selected sequence sources to the default dbs
       srces.addAll(selsources);
@@ -622,33 +625,43 @@ public class DBRefFetcher implements Runnable
         final int sequenceStart = sequence.getStart();
         if (absStart == -1)
         {
-          // Is local sequence contained in dataset sequence?
+          // couldn't find local sequence in sequence from database, so check if
+          // the database sequence is a subsequence of local sequence
           absStart = nonGapped.indexOf(entrySeq);
           if (absStart == -1)
-          { // verification failed.
+          {
+            // verification failed. couldn't find any relationship between
+            // entrySeq and local sequence
             messages.append(sequence.getName()
                     + " SEQUENCE NOT %100 MATCH \n");
             continue;
           }
+          /*
+           * found match for the whole of the database sequence within the local
+           * sequence's reference frame. 
+           */
           transferred = true;
           sbuffer.append(sequence.getName() + " HAS " + absStart
                   + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
-          //
-          // + " - ANY SEQUENCE FEATURES"
-          // + " HAVE BEEN ADJUSTED ACCORDINGLY \n");
-          // absStart = 0;
-          // create valid mapping between matching region of local sequence and
-          // the mapped sequence
+
+          /*
+           * So create a mapping to the external entry from the matching region of 
+           * the local sequence, and leave local start/end untouched. 
+           */
           mp = new Mapping(null, new int[] { sequenceStart + absStart,
               sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
           { entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
                   1, 1);
-          updateRefFrame = false; // mapping is based on current start/end so
-          // don't modify start and end
+          updateRefFrame = false;
         }
         else
         {
+          /*
+           * found a match for the local sequence within sequence from 
+           * the external database 
+           */
           transferred = true;
+
           // update start and end of local sequence to place it in entry's
           // reference frame.
           // apply identity map map from whole of local sequence to matching
@@ -660,10 +673,14 @@ public class DBRefFetcher implements Runnable
           // absStart+sequence.getStart()+entrySeq.length()-1},
           // new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
           // relocate local features for updated start
+
           if (updateRefFrame)
           {
             if (sequence.getSequenceFeatures() != null)
             {
+              /*
+               * relocate existing sequence features by offset
+               */
               SequenceFeature[] sf = sequence.getSequenceFeatures();
               int start = sequenceStart;
               int end = sequence.getEnd();
@@ -686,7 +703,7 @@ public class DBRefFetcher implements Runnable
         System.out.println("Adding dbrefs to " + sequence.getName()
                 + " from " + dbSource + " sequence : " + entry.getName());
         sequence.transferAnnotation(entry, mp);
-        // unknownSequences.remove(sequence);
+
         absStart += entry.getStart();
         int absEnd = absStart + nonGapped.length() - 1;
         if (!trimDatasetSeqs)
index 8cc0ce4..81b4caf 100644 (file)
@@ -205,10 +205,10 @@ public class Uniprot extends DbSourceProxyImpl
     {
       DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
               accessionId);
+
+      // mark dbRef as a primary reference for this sequence
       dbRefs.add(dbRef);
     }
-    sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
-            .get(0) : null);
 
     Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
     for (PDBEntry pdb : entry.getDbReference())
index 6c94723..0ab6e7d 100644 (file)
@@ -323,41 +323,28 @@ public class SiftsClient implements SiftsClientI
   public DBRefEntryI getValidSourceDBRef(SequenceI seq)
           throws SiftsException
   {
-    DBRefEntryI sourceDBRef = null;
-    sourceDBRef = seq.getSourceDBRef();
-    if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
+    DBRefEntry[] dbRefs = seq.getDBRefs();
+    if (dbRefs == null || dbRefs.length < 1)
     {
-      return sourceDBRef;
+      throw new SiftsException(
+              "Source DBRef could not be determined. DBRefs might not have been retrieved.");
     }
-    else
+
+    for (DBRefEntryI dbRef : dbRefs)
     {
-      DBRefEntry[] dbRefs = seq.getDBRefs();
-      if (dbRefs == null || dbRefs.length < 1)
+      if (dbRef == null || dbRef.getAccessionId() == null
+              || dbRef.getSource() == null)
       {
-        throw new SiftsException(
-                "Source DBRef could not be determined. DBRefs might not have been retrieved.");
+        continue;
       }
-
-      for (DBRefEntryI dbRef : dbRefs)
+      if (isValidDBRefEntry(dbRef)
+              && dbRef.isPrimary()
+              && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
+                      .getSource().equalsIgnoreCase(DBRefSource.PDB)))
       {
-        if (dbRef == null || dbRef.getAccessionId() == null
-                || dbRef.getSource() == null)
-        {
-          continue;
-        }
-        if (isFoundInSiftsEntry(dbRef.getAccessionId())
-                && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
-                        .getSource().equalsIgnoreCase(DBRefSource.PDB)))
-        {
-          seq.setSourceDBRef(dbRef);
-          return dbRef;
-        }
+        return dbRef;
       }
     }
-    if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
-    {
-      return sourceDBRef;
-    }
     throw new SiftsException("Could not get source DB Ref");
   }
 
@@ -440,7 +427,7 @@ public class SiftsClient implements SiftsClientI
     String originalSeq = AlignSeq.extractGaps(
             jalview.util.Comparison.GapChars, seq.getSequenceAsString());
     HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
-    DBRefEntryI sourceDBRef = seq.getSourceDBRef();
+    DBRefEntryI sourceDBRef;
     sourceDBRef = getValidSourceDBRef(seq);
     // TODO ensure sequence start/end is in the same coordinate system and
     // consistent with the choosen sourceDBRef
index a856231..ddd38e7 100644 (file)
@@ -994,29 +994,44 @@ public class AlignmentUtilsTests
 
     /*
      * need a sourceDbRef if we are to construct dbrefs to the CDS
-     * sequence
+     * sequence from the dna contig sequences
      */
     DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
-    dna1.getDatasetSequence().setSourceDBRef(dbref);
+    dna1.getDatasetSequence().addDBRef(dbref);
+    org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
     dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
-    dna2.getDatasetSequence().setSourceDBRef(dbref);
+    dna2.getDatasetSequence().addDBRef(dbref);
+    org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
 
     /*
      * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
      * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
      */
-    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+    MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
             new int[] { 1, 2 }, 3, 1);
     AlignedCodonFrame acf = new AlignedCodonFrame();
-    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+            mapfordna1);
     dna.addCodonFrame(acf);
-    map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+    MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+            new int[] { 1, 3 },
             3, 1);
     acf = new AlignedCodonFrame();
-    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
+            mapfordna2);
     dna.addCodonFrame(acf);
 
     /*
+     * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+     */
+    DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
+            new Mapping(mapfordna1));
+    dna1.getDatasetSequence().addDBRef(dna1xref);
+    DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
+            new Mapping(mapfordna2));
+    dna2.getDatasetSequence().addDBRef(dna2xref);
+
+    /*
      * execute method under test:
      */
     AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
@@ -1042,11 +1057,12 @@ public class AlignmentUtilsTests
      * verify CDS has a dbref with mapping to peptide
      */
     assertNotNull(cds1Dss.getDBRefs());
-    assertEquals(1, cds1Dss.getDBRefs().length);
+    assertEquals(2, cds1Dss.getDBRefs().length);
     dbref = cds1Dss.getDBRefs()[0];
-    assertEquals("UNIPROT", dbref.getSource());
-    assertEquals("0", dbref.getVersion());
-    assertEquals("pep1", dbref.getAccessionId());
+    assertEquals(dna1xref.getSource(), dbref.getSource());
+    // version is via ensembl's primary ref
+    assertEquals(dna1xref.getVersion(), dbref.getVersion());
+    assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
     assertNotNull(dbref.getMap());
     assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
     MapList cdsMapping = new MapList(new int[] { 1, 6 },
@@ -1057,6 +1073,7 @@ public class AlignmentUtilsTests
      * verify peptide has added a dbref with reverse mapping to CDS
      */
     assertNotNull(pep1.getDBRefs());
+    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
     assertEquals(2, pep1.getDBRefs().length);
     dbref = pep1.getDBRefs()[1];
     assertEquals("ENSEMBL", dbref.getSource());
index 62bcae8..24ddb34 100644 (file)
@@ -80,12 +80,11 @@ public class CrossRefTest
      * Just the protein refs:
      */
     found = DBRefUtils.selectDbRefs(false, refs);
-    assertEquals(5, found.length);
+    assertEquals(4, found.length);
     assertSame(ref1, found[0]);
     assertSame(ref2, found[1]);
-    assertSame(ref3, found[2]);
-    assertSame(ref4, found[3]);
-    assertSame(ref9, found[4]);
+    assertSame(ref4, found[2]);
+    assertSame(ref9, found[3]);
   }
 
   /**
index b75ef50..7ad9436 100644 (file)
@@ -27,6 +27,7 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.FormatAdapter;
 import jalview.util.MapList;
@@ -37,6 +38,7 @@ import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 
+import org.testng.Assert;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
@@ -101,6 +103,460 @@ public class AlignmentTest
     return a;
   }
 
+  /**
+   * assert wrapper: tests all references in the given alignment are consistent
+   * 
+   * @param alignment
+   */
+  public static void assertAlignmentDatasetRefs(AlignmentI alignment)
+  {
+    verifyAlignmentDatasetRefs(alignment, true, null);
+  }
+
+  /**
+   * assert wrapper: tests all references in the given alignment are consistent
+   * 
+   * @param alignment
+   * @param message
+   *          - prefixed to any assert failed messages
+   */
+  public static void assertAlignmentDatasetRefs(AlignmentI alignment,
+          String message)
+  {
+    verifyAlignmentDatasetRefs(alignment, true, message);
+  }
+
+  /**
+   * verify sequence and dataset references are properly contained within
+   * dataset
+   * 
+   * @param alignment
+   *          - the alignmentI object to verify (either alignment or dataset)
+   * @param raiseAssert
+   *          - when set, testng assertions are raised.
+   *          @param message
+   *          - null or a string message to prepend to the assert failed messages.
+   * @return true if alignment references were in order, otherwise false.
+   */
+  public static boolean verifyAlignmentDatasetRefs(AlignmentI alignment,
+          boolean raiseAssert, String message)
+  {
+    if (message==null) { message = ""; }
+    if (alignment == null)
+    {
+      if (raiseAssert)
+      {
+        Assert.fail(message+"Alignment for verification was null.");
+      }
+      return false;
+    }
+    if (alignment.getDataset() != null)
+    {
+      AlignmentI dataset = alignment.getDataset();
+      // check all alignment sequences have their dataset within the dataset
+      for (SequenceI seq : alignment.getSequences())
+      {
+        SequenceI seqds = seq.getDatasetSequence();
+        if (seqds.getDatasetSequence() != null)
+        {
+          if (raiseAssert)
+          {
+            Assert.fail(message+" Alignment contained a sequence who's dataset sequence has a second dataset reference.");
+          }
+          return false;
+        }
+        if (dataset.findIndex(seqds) == -1)
+        {
+          if (raiseAssert)
+          {
+            Assert.fail(message+" Alignment contained a sequence who's dataset sequence was not in the dataset.");
+          }
+          return false;
+        }
+      }
+      return verifyAlignmentDatasetRefs(alignment.getDataset(), raiseAssert, message);
+    }
+    else
+    {
+      int dsp = -1;
+      // verify all dataset sequences
+      for (SequenceI seqds : alignment.getSequences())
+      {
+        dsp++;
+        if (seqds.getDatasetSequence() != null)
+        {
+          if (raiseAssert)
+          {
+            Assert.fail(message+" Dataset contained a sequence with non-null dataset reference (ie not a dataset sequence!)");
+          }
+          return false;
+        }
+        int foundp = alignment.findIndex(seqds);
+        if (foundp != dsp)
+        {
+          if (raiseAssert)
+          {
+            Assert.fail(message
+                    + " Dataset sequence array contains a reference at "
+                    + dsp + " to a sequence first seen at " + foundp + " ("
+                    + seqds.toString() + ")");
+          }
+          return false;
+        }
+        if (seqds.getDBRefs() != null)
+        {
+          for (DBRefEntry dbr : seqds.getDBRefs())
+          {
+            if (dbr.getMap() != null)
+            {
+              SequenceI seqdbrmapto = dbr.getMap().getTo();
+              if (seqdbrmapto != null)
+              {
+                if (seqdbrmapto.getDatasetSequence() != null)
+                {
+                  if (raiseAssert)
+                  {
+                    Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence which was not a dataset sequence");
+                  }
+                  return false;
+
+                }
+                if (alignment.findIndex(dbr.getMap().getTo()) == -1)
+                {
+                  if (raiseAssert)
+                  {
+                    Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence not in dataset");
+                  }
+                  return false;
+                }
+              }
+            }
+          }
+        }
+      }
+      // finally, verify codonmappings involve only dataset sequences.
+      if (alignment.getCodonFrames() != null)
+      {
+        for (AlignedCodonFrame alc : alignment.getCodonFrames())
+        {
+          for (SequenceToSequenceMapping ssm : alc.getMappings())
+          {
+            if (ssm.getFromSeq().getDatasetSequence() != null)
+            {
+              if (raiseAssert)
+              {
+                Assert.fail(message+" CodonFrame-SSM-FromSeq is not a dataset sequence");
+              }
+              return false;
+            }
+            if (alignment.findIndex(ssm.getFromSeq()) == -1)
+            {
+
+              if (raiseAssert)
+              {
+                Assert.fail(message+" CodonFrame-SSM-FromSeq is not contained in dataset");
+              }
+              return false;
+            }
+            if (ssm.getMapping().getTo().getDatasetSequence() != null)
+            {
+              if (raiseAssert)
+              {
+                Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not a dataset sequence");
+              }
+              return false;
+            }
+            if (alignment.findIndex(ssm.getMapping().getTo()) == -1)
+            {
+
+              if (raiseAssert)
+              {
+                Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not contained in dataset");
+              }
+              return false;
+            }
+          }
+        }
+      }
+    }
+    return true; // all relationships verified!
+  }
+
+  /**
+   * call verifyAlignmentDatasetRefs with and without assertion raising enabled,
+   * to check expected pass/fail actually occurs in both conditions
+   * 
+   * @param al
+   * @param expected
+   * @param msg
+   */
+  private void assertVerifyAlignment(AlignmentI al, boolean expected,
+          String msg)
+  {
+    if (expected)
+    {
+      try
+      {
+
+        Assert.assertTrue(verifyAlignmentDatasetRefs(al, true, null),
+                "Valid test alignment failed when raiseAsserts enabled:"
+                        + msg);
+      } catch (AssertionError ae)
+      {
+        ae.printStackTrace();
+        Assert.fail(
+                "Valid test alignment raised assertion errors when raiseAsserts enabled: "
+                        + msg, ae);
+      }
+      // also check validation passes with asserts disabled
+      Assert.assertTrue(verifyAlignmentDatasetRefs(al, false, null),
+              "Valid test alignment tested false when raiseAsserts disabled:"
+                      + msg);
+    }
+    else
+    {
+      boolean assertRaised = false;
+      try
+      {
+        verifyAlignmentDatasetRefs(al, true, null);
+      } catch (AssertionError ae)
+      {
+        // expected behaviour
+        assertRaised = true;
+      }
+      if (!assertRaised)
+      {
+        Assert.fail("Invalid test alignment passed when raiseAsserts enabled:"
+                + msg);
+      }
+      // also check validation passes with asserts disabled
+      Assert.assertFalse(verifyAlignmentDatasetRefs(al, false, null),
+              "Invalid test alignment tested true when raiseAsserts disabled:"
+                      + msg);
+    }
+  }
+  @Test(groups = { "Functional" })
+  public void testVerifyAlignmentDatasetRefs()
+  {
+    SequenceI sq1 = new Sequence("sq1", "ASFDD"), sq2 = new Sequence("sq2",
+            "TTTTTT");
+
+    // construct simple valid alignment dataset
+    Alignment al = new Alignment(new SequenceI[] {
+        sq1, sq2 });
+    // expect this to pass
+    assertVerifyAlignment(al, true, "Simple valid alignment didn't verify");
+
+    // check test for sequence->datasetSequence validity
+    sq1.setDatasetSequence(sq2);
+    assertVerifyAlignment(
+            al,
+            false,
+            "didn't detect dataset sequence with a dataset sequence reference.");
+
+    sq1.setDatasetSequence(null);
+    assertVerifyAlignment(
+            al,
+            true,
+            "didn't reinstate validity after nulling dataset sequence dataset reference");
+
+    // now create dataset and check again
+    al.createDatasetAlignment();
+    assertNotNull(al.getDataset());
+
+    assertVerifyAlignment(al, true,
+            "verify failed after createDatasetAlignment");
+
+    // create a dbref on sq1 with a sequence ref to sq2
+    DBRefEntry dbrs1tos2 = new DBRefEntry("UNIPROT", "1", "Q111111");
+    dbrs1tos2.setMap(new Mapping(sq2.getDatasetSequence(),
+            new int[] { 1, 5 }, new int[] { 2, 6 }, 1, 1));
+    sq1.getDatasetSequence().addDBRef(dbrs1tos2);
+    assertVerifyAlignment(al, true,
+            "verify failed after addition of valid DBRefEntry/map");
+    // now create a dbref on a new sequence which maps to another sequence
+    // outside of the dataset
+    SequenceI sqout = new Sequence("sqout", "ututututucagcagcag"), sqnew = new Sequence(
+            "sqnew", "EEERRR");
+    DBRefEntry sqnewsqout = new DBRefEntry("ENAFOO", "1", "R000001");
+    sqnewsqout.setMap(new Mapping(sqout, new int[] { 1, 6 }, new int[] { 1,
+        18 }, 1, 3));
+    al.getDataset().addSequence(sqnew);
+
+    assertVerifyAlignment(al, true,
+            "verify failed after addition of new sequence to dataset");
+    // now start checking exception conditions
+    sqnew.addDBRef(sqnewsqout);
+    assertVerifyAlignment(
+            al,
+            false,
+            "verify passed when a dbref with map to sequence outside of dataset was added");
+    // make the verify pass by adding the outsider back in
+    al.getDataset().addSequence(sqout);
+    assertVerifyAlignment(al, true,
+            "verify should have passed after adding dbref->to sequence in to dataset");
+    // and now the same for a codon mapping...
+    SequenceI sqanotherout = new Sequence("sqanotherout",
+            "aggtutaggcagcagcag");
+
+    AlignedCodonFrame alc = new AlignedCodonFrame();
+    alc.addMap(sqanotherout, sqnew, new MapList(new int[] { 1, 6 },
+            new int[] { 1, 18 }, 3, 1));
+
+    al.addCodonFrame(alc);
+    Assert.assertEquals(al.getDataset().getCodonFrames().size(), 1);
+
+    assertVerifyAlignment(
+            al,
+            false,
+            "verify passed when alCodonFrame mapping to sequence outside of dataset was added");
+    // make the verify pass by adding the outsider back in
+    al.getDataset().addSequence(sqanotherout);
+    assertVerifyAlignment(
+            al,
+            true,
+            "verify should have passed once all sequences involved in alCodonFrame were added to dataset");
+    al.getDataset().addSequence(sqanotherout);
+    assertVerifyAlignment(al, false,
+            "verify should have failed when a sequence was added twice to the dataset");
+    al.getDataset().deleteSequence(sqanotherout);
+    assertVerifyAlignment(al, true,
+            "verify should have passed after duplicate entry for sequence was removed");
+  }
+
+  /**
+   * checks that the sequence data for an alignment's dataset is non-redundant.
+   * Fails if there are sequences with same id, sequence, start, and.
+   */
+
+  public static void assertDatasetIsNormalised(AlignmentI al)
+  {
+    assertDatasetIsNormalised(al, null);
+  }
+
+  /**
+   * checks that the sequence data for an alignment's dataset is non-redundant.
+   * Fails if there are sequences with same id, sequence, start, and.
+   * 
+   * @param al
+   *          - alignment to verify
+   * @param message
+   *          - null or message prepended to exception message.
+   */
+  public static void assertDatasetIsNormalised(AlignmentI al, String message)
+  {
+    if (al.getDataset()!=null)
+    {
+      assertDatasetIsNormalised(al.getDataset(), message);
+      return;
+    }
+    /*
+     * look for pairs of sequences with same ID, start, end, and sequence
+     */
+    List<SequenceI> seqSet = al.getSequences();
+    for (int p=0;p<seqSet.size(); p++)
+    {
+      SequenceI pSeq = seqSet.get(p);
+      for (int q=p+1; q<seqSet.size(); q++)
+      {
+        SequenceI qSeq = seqSet.get(q);
+        if (pSeq.getStart()!=qSeq.getStart())
+        {
+          continue;
+        }
+        if (pSeq.getEnd()!=qSeq.getEnd())
+        {
+          continue;
+        }
+        if (!pSeq.getName().equals(qSeq.getName()))
+        {
+          continue;
+        }
+        if (!Arrays.equals(pSeq.getSequence(), qSeq.getSequence()))
+        {
+          continue;
+        }
+        Assert.fail((message == null ? "" : message + " :")
+                + "Found similar sequences at position " + p + " and " + q
+                + "\n" + pSeq.toString());
+      }
+    }
+  }
+  
+  @Test(groups = { "Functional", "Asserts" })
+  public void testAssertDatasetIsNormalised()
+  {
+    Sequence sq1 = new Sequence("s1/1-4", "asdf");
+    Sequence sq1shift = new Sequence("s1/2-5", "asdf");
+    Sequence sq1seqd = new Sequence("s1/1-4", "asdt");
+    Sequence sq2 = new Sequence("s2/1-4", "asdf");
+    Sequence sq1dup = new Sequence("s1/1-4", "asdf");
+
+    Alignment al = new Alignment(new SequenceI[] { sq1 });
+    al.setDataset(null);
+
+    try
+    {
+      assertDatasetIsNormalised(al);
+    } catch (AssertionError ae)
+    {
+      Assert.fail("Single sequence should be valid normalised dataset.");
+    }
+    al.addSequence(sq2);
+    try
+    {
+      assertDatasetIsNormalised(al);
+    } catch (AssertionError ae)
+    {
+      Assert.fail("Two different sequences should be valid normalised dataset.");
+    }
+    /*
+     * now change sq2's name in the alignment. should still be valid
+     */
+    al.findName(sq2.getName()).setName("sq1");
+    try
+    {
+      assertDatasetIsNormalised(al);
+    } catch (AssertionError ae)
+    {
+      Assert.fail("Two different sequences in dataset, but same name in alignment, should be valid normalised dataset.");
+    }
+
+    al.addSequence(sq1seqd);
+    try
+    {
+      assertDatasetIsNormalised(al);
+    } catch (AssertionError ae)
+    {
+      Assert.fail("sq1 and sq1 with different sequence should be distinct.");
+    }
+
+    al.addSequence(sq1shift);
+    try
+    {
+      assertDatasetIsNormalised(al);
+    } catch (AssertionError ae)
+    {
+      Assert.fail("sq1 and sq1 with different start/end should be distinct.");
+    }
+    /*
+     * finally, the failure case
+     */
+    al.addSequence(sq1dup);
+    boolean ssertRaised = false;
+    try
+    {
+      assertDatasetIsNormalised(al);
+
+    } catch (AssertionError ae)
+    {
+      ssertRaised = true;
+    }
+    if (!ssertRaised)
+    {
+      Assert.fail("Expected identical sequence to raise exception.");
+    }
+  }
   /*
    * Read in Stockholm format test data including secondary structure
    * annotations.
@@ -460,6 +916,60 @@ public class AlignmentTest
     assertTrue(ds.getCodonFrames().contains(acf));
   }
 
+  /**
+   * tests the addition of *all* sequences referred to by a sequence being added
+   * to the dataset
+   */
+  @Test(groups = "Functional")
+  public void testCreateDatasetAlignmentWithMappedToSeqs()
+  {
+    // Alignment with two sequences, gapped.
+    SequenceI sq1 = new Sequence("sq1", "A--SDF");
+    SequenceI sq2 = new Sequence("sq2", "G--TRQ");
+
+    // cross-references to two more sequences.
+    DBRefEntry dbr = new DBRefEntry("SQ1", "", "sq3");
+    SequenceI sq3 = new Sequence("sq3", "VWANG");
+    dbr.setMap(new Mapping(sq3, new MapList(new int[] { 1, 4 }, new int[] {
+        2, 5 }, 1, 1)));
+    sq1.addDBRef(dbr);
+
+    SequenceI sq4 = new Sequence("sq4", "ERKWI");
+    DBRefEntry dbr2 = new DBRefEntry("SQ2", "", "sq4");
+    dbr2.setMap(new Mapping(sq4, new MapList(new int[] { 1, 4 }, new int[] {
+        2, 5 }, 1, 1)));
+    sq2.addDBRef(dbr2);
+    // and a 1:1 codonframe mapping between them.
+    AlignedCodonFrame alc = new AlignedCodonFrame();
+    alc.addMap(sq1, sq2, new MapList(new int[] { 1, 4 },
+            new int[] { 1, 4 }, 1, 1));
+
+    AlignmentI protein = new Alignment(new SequenceI[] { sq1, sq2 });
+
+    /*
+     * create the alignment dataset
+     * note this creates sequence datasets where missing
+     * as a side-effect (in this case, on seq2
+     */
+
+    // TODO promote this method to AlignmentI
+    ((Alignment) protein).createDatasetAlignment();
+
+    AlignmentI ds = protein.getDataset();
+
+    // should be 4 sequences in dataset - two materialised, and two propagated
+    // from dbref
+    assertEquals(4, ds.getHeight());
+    assertTrue(ds.getSequences().contains(sq1.getDatasetSequence()));
+    assertTrue(ds.getSequences().contains(sq2.getDatasetSequence()));
+    assertTrue(ds.getSequences().contains(sq3));
+    assertTrue(ds.getSequences().contains(sq4));
+    // Should have one codon frame mapping between sq1 and sq2 via dataset
+    // sequences
+    assertEquals(ds.getCodonFrame(sq1.getDatasetSequence()),
+            ds.getCodonFrame(sq2.getDatasetSequence()));
+  }
+
   @Test(groups = "Functional")
   public void testAddCodonFrame()
   {
@@ -483,6 +993,27 @@ public class AlignmentTest
   }
 
   @Test(groups = "Functional")
+  public void testAddSequencePreserveDatasetIntegrity()
+  {
+    Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+    Alignment align = new Alignment(new SequenceI[] { seq });
+    align.createDatasetAlignment();
+    AlignmentI ds = align.getDataset();
+    SequenceI copy = new Sequence(seq);
+    copy.insertCharAt(3, 5, '-');
+    align.addSequence(copy);
+    Assert.assertEquals(align.getDataset().getHeight(), 1,
+            "Dataset shouldn't have more than one sequence.");
+
+    Sequence seq2 = new Sequence("newtestSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+    align.addSequence(seq2);
+    Assert.assertEquals(align.getDataset().getHeight(), 2,
+            "Dataset should now have two sequences.");
+
+    assertAlignmentDatasetRefs(align,
+            "addSequence broke dataset reference integrity");
+  }
+  @Test(groups = "Functional")
   public void getVisibleStartAndEndIndexTest()
   {
     Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
@@ -510,4 +1041,5 @@ public class AlignmentTest
     assertEquals(1, startEnd[0]);
     assertEquals(23, startEnd[1]);
   }
+
 }
index ae6dcda..09d9df1 100644 (file)
@@ -138,4 +138,62 @@ public class DBRefEntryTest
     assertFalse(ref1.updateFrom(ref2));
     assertEquals("10", ref1.getVersion());
   }
+
+  @Test(groups = { "Functional" })
+  public void testIsPrimary()
+  {
+    DBRefEntry dbr = new DBRefEntry(DBRefSource.UNIPROT, "", "Q12345");
+    assertTrue(dbr.isPrimary());
+    /*
+     *  1:1 mapping 
+     */
+    dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+            1));
+    assertTrue(dbr.isPrimary());
+    /*
+     * Version string is prefixed with another dbref source string (fail)
+     */
+    dbr.setVersion(DBRefSource.EMBL + ":0");
+    assertFalse(dbr.isPrimary());
+
+    /*
+     * Version string is alphanumeric
+     */
+    dbr.setVersion("0.1.b");
+    assertTrue(dbr.isPrimary());
+
+    /*
+     *  1:1 mapping with shift (fail)
+     */
+    dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 2, 4 }, 1,
+            1));
+    assertFalse(dbr.isPrimary());
+
+    /*
+     *  1:1 mapping and sequenceRef (fail)
+     */
+    dbr.setMap(new Mapping(new Sequence("foo", "ASDF"), new int[] { 1, 3 },
+            new int[] { 1, 3 }, 1, 1));
+    assertFalse(dbr.isPrimary());
+
+    /*
+     * 1:3 mapping (fail)
+     */
+    dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+            3));
+    assertFalse(dbr.isPrimary());
+    /*
+     * 2:2 mapping with shift (expected fail, but maybe use case for a pass)
+     */
+    dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 2,
+            2));
+    assertFalse(dbr.isPrimary());
+
+    /*
+     * Version string is prefixed with another dbref source string
+     */
+    dbr.setVersion(DBRefSource.EMBL + ":0");
+    assertFalse(dbr.isPrimary());
+
+  }
 }
index cfc4cbb..3ad309e 100644 (file)
@@ -438,36 +438,56 @@ public class SequenceTest
 
     sq.setDescription("Test sequence description..");
     sq.setVamsasId("TestVamsasId");
-    sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST"));
+    sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
 
-    sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst"));
-    sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst"));
-    sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst"));
-    sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst"));
+    sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
+    sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
 
     sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
     sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
     sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
     sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+    
+    DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
+    DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
 
+    //FIXME pdb2pdb's matching PDBEntry has Type.MMCIF - but 2.10 only has PDBEntry with type==PDB to indicate ID is a real PDB entry
+    
+    List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb });
+
+    sq.getDatasetSequence().addDBRef(pdb1pdb);
+    sq.getDatasetSequence().addDBRef(pdb2pdb);
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version1", "1Tst"));
-    sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version2", "2Tst"));
-    sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version3", "3Tst"));
+            new DBRefEntry("PDB", "version3", "3PDB"));
     sq.getDatasetSequence().addDBRef(
-            new DBRefEntry("PDB", "version4", "4Tst"));
-
-    sq.getDatasetSequence().addPDBId(
-            new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
-    sq.getDatasetSequence().addPDBId(
-            new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
+            new DBRefEntry("PDB", "version4", "4PDB"));
+    
+    PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
+    PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
+    PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2");
+    PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2");
     sq.getDatasetSequence().addPDBId(
-            new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
+            pdbe1a);
     sq.getDatasetSequence().addPDBId(
-            new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+            pdbe1b);
+    sq.getDatasetSequence().addPDBId(pdbe2a);
+    sq.getDatasetSequence().addPDBId(pdbe2b);
+
+    /*
+     * test we added pdb entries to the dataset sequence
+     */
+    Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
+            .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
+            "PDB Entries were not found on dataset sequence.");
 
+    /*
+     * we should recover a pdb entry that is on the dataset sequence via PDBEntry
+     */
+    Assert.assertEquals(pdbe1a,
+            sq.getDatasetSequence().getPDBEntry("1PDB"),
+            "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
     ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
     System.out.println(">>>>>> " + sq.getSequenceAsString().length());
     annotsList.add(new Annotation("A", "A", 'X', 0.1f));
@@ -479,7 +499,7 @@ public class SequenceTest
             new AlignmentAnnotation("Test annot", "Test annot description",
                     annots));
     Assert.assertEquals(sq.getDescription(), "Test sequence description..");
-    Assert.assertEquals(sq.getDBRefs().length, 4);
+    Assert.assertEquals(sq.getDBRefs().length, 5);
     Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(sq.getAnnotation());
     Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
@@ -492,7 +512,7 @@ public class SequenceTest
 
     Assert.assertEquals(derived.getDescription(),
             "Test sequence description..");
-    Assert.assertEquals(derived.getDBRefs().length, 4);
+    Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
     Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
     Assert.assertNotNull(derived.getAnnotation());
     Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
@@ -510,6 +530,17 @@ public class SequenceTest
     assertNotNull(sq.getSequenceFeatures());
     assertArrayEquals(sq.getSequenceFeatures(),
             derived.getSequenceFeatures());
+    
+    /*
+     *  verify we have primary db refs *just* for PDB IDs with associated
+     *  PDBEntry objects
+     */
+
+    assertEquals(primRefs, sq.getPrimaryDBRefs());
+    assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+    assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
   }
 
   /**
@@ -734,4 +765,30 @@ public class SequenceTest
     assertSame(dbref3, sq.getDBRefs()[2]);
     assertEquals("3", dbref2.getVersion());
   }
+
+  @Test(groups = { "Functional" })
+  public void testGetPrimaryDBRefs()
+  {
+    /*
+     * test PDB relationships for for getPrimaryDBRefs
+     */
+    SequenceI seq = new Sequence("aseq", "ASDF");
+    DBRefEntry upentry = new DBRefEntry("UNIPROT", "0", "1qip");
+    // primary - uniprot
+    seq.addDBRef(upentry);
+    // primary - type is PDB
+    DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
+    seq.addDBRef(pdbentry);
+    // not primary - type of PDBEntry is not PDB
+    seq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
+    // not primary - no PDBEntry
+    seq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
+    seq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, null));
+    seq.addPDBId(new PDBEntry("1AAA", null, null, null));
+    assertTrue("Couldn't find simple primary reference (UNIPROT)", seq
+            .getPrimaryDBRefs().contains(upentry));
+    assertTrue("Couldn't find expected PDB primary reference", seq
+            .getPrimaryDBRefs().contains(pdbentry));
+    assertEquals(2, seq.getPrimaryDBRefs().size());
+  }
 }
index 4b71417..abe5099 100644 (file)
@@ -128,6 +128,7 @@ public class EmblEntryTest
     assertEquals(5, dbrefs.length);
     assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
     assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
+    // TODO: verify getPrimaryDBRefs() for peptide products
     assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
     assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
     assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
index 1dc9b8d..4dc8ab2 100644 (file)
@@ -1,6 +1,7 @@
 package jalview.ext.ensembl;
 
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
 
 import jalview.datamodel.DBRefEntry;
 
@@ -24,8 +25,11 @@ public class EnsemblXrefTest
   @Test(groups = "Functional")
   public void testGetCrossReferences()
   {
+    String dbName = "ENSEMBL";
+    String dbVers = "0.6.2b1";
     System.out.println(JSON);
-    EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org")
+    EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org", dbName,
+            dbVers)
     {
       @Override
       protected BufferedReader getHttpResponse(URL url, List<String> ids)
@@ -40,8 +44,12 @@ public class EnsemblXrefTest
     assertEquals(2, dbrefs.size());
     assertEquals("CCDS", dbrefs.get(0).getSource());
     assertEquals("CCDS5863", dbrefs.get(0).getAccessionId());
+    assertFalse(dbrefs.get(0).isPrimary());
+    assertEquals(dbName + ":" + dbVers, dbrefs.get(0).getVersion());
     // Uniprot name should get converted to Jalview canonical form
     assertEquals("UNIPROT", dbrefs.get(1).getSource());
     assertEquals("P15056", dbrefs.get(1).getAccessionId());
+    assertEquals(dbName + ":" + dbVers, dbrefs.get(1).getVersion());
+    assertFalse(dbrefs.get(1).isPrimary());
   }
 }
diff --git a/test/jalview/io/CrossRef2xmlTests.java b/test/jalview/io/CrossRef2xmlTests.java
new file mode 100644 (file)
index 0000000..2063c88
--- /dev/null
@@ -0,0 +1,575 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.analysis.CrossRef;
+import jalview.api.AlignmentViewPanel;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentTest;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.CrossRefAction;
+import jalview.gui.Desktop;
+import jalview.gui.Jalview2XML;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+@Test(singleThreaded = true)
+public class CrossRef2xmlTests extends Jalview2xmlBase
+{
+
+  /**
+   * test store and recovery of all reachable cross refs from all reachable
+   * crossrefs for one or more fetched db refs. Currently, this test has a known
+   * failure case.
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Operational" }, enabled = true)
+  public void testRetrieveAndShowCrossref() throws Exception
+  {
+
+    List<String> failedDBRetr = new ArrayList<String>();
+    List<String> failedXrefMenuItems = new ArrayList<String>();
+    List<String> failedProjectRecoveries = new ArrayList<String>();
+
+    // for every set of db queries
+    // retrieve db query
+    // verify presence of expected xrefs
+    // show xrefs - verify expected type of frame is shown for each xref
+    // show xrefs again
+    // - verify original -> xref -> xref(original) recovers frame containing at
+    // least the first retrieved sequence
+    // store
+    // 1. whole project
+    // 2. individual frames
+    // 3. load each one back and verify
+    // . aligned sequences (.toString() )
+    // . xrefs (.toString() )
+    // . codonframes
+    //
+    //
+    HashMap<String, String> dbtoviewBit = new HashMap<String, String>();
+    List<String> keyseq = new ArrayList<String>();
+    HashMap<String, File> savedProjects = new HashMap<String, File>();
+
+    for (String[] did : new String[][] { { "ENSEMBL", "ENSG00000157764" },
+    { "UNIPROT", "P01731" } })
+    {
+      // pass counters - 0 - first pass, 1 means retrieve project rather than
+      // perform action
+      int pass1 = 0, pass2 = 0, pass3 = 0;
+      // each do loop performs two iterations in the first outer loop pass, but
+      // only performs one iteration on the second outer loop
+      // ie. pass 1 = 0 {pass 2= 0 { pass 3 = 0,1 }, pass 2=1 { pass 3 = 0 }}, 1
+      // { pass 2 = 0 { pass 3 = 0 } }
+      do
+      {
+        String first = did[0] + " " + did[1];
+        AlignFrame af = null;
+        boolean dna;
+        AlignmentI retral;
+        AlignmentI dataset;
+        SequenceI[] seqs;
+        List<String> ptypes = null;
+        if (pass1 == 0)
+        {
+          // retrieve dbref
+
+          List<AlignFrame> afs = jalview.gui.SequenceFetcher.fetchAndShow(
+                  did[0], did[1]);
+          if (afs.size() == 0)
+          {
+            failedDBRetr.add("Didn't retrieve " + first);
+            break;
+          }
+          keyseq.add(first);
+          af = afs.get(0);
+
+          // verify references for retrieved data
+          AlignmentTest.assertAlignmentDatasetRefs(af.getViewport()
+                  .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+                  + pass3 + "): Fetch " + first + ":");
+          assertDatasetIsNormalisedKnownDefect(af.getViewport()
+                  .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+                  + pass3 + "): Fetch " + first + ":");
+          dna = af.getViewport().getAlignment().isNucleotide();
+          retral = af.getViewport().getAlignment();
+          dataset = retral.getDataset();
+          seqs = retral.getSequencesArray();
+
+        }
+        else
+        {
+          Desktop.instance.closeAll_actionPerformed(null);
+          // recover stored project
+          af = new FileLoader(false).LoadFileWaitTillLoaded(savedProjects
+                  .get(first).toString(), FormatAdapter.FILE);
+          System.out.println("Recovered view for '" + first + "' from '"
+                  + savedProjects.get(first).toString() + "'");
+          dna = af.getViewport().getAlignment().isNucleotide();
+          retral = af.getViewport().getAlignment();
+          dataset = retral.getDataset();
+          seqs = retral.getSequencesArray();
+
+          // verify references for recovered data
+          AlignmentTest.assertAlignmentDatasetRefs(af.getViewport()
+                  .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+                  + pass3 + "): Recover " + first + ":");
+          assertDatasetIsNormalisedKnownDefect(af.getViewport()
+                  .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+                  + pass3 + "): Recover " + first + ":");
+
+        }
+
+        // store project on first pass, compare next pass
+        stringify(dbtoviewBit, savedProjects, first, af.alignPanel);
+
+        ptypes = (seqs == null || seqs.length == 0) ? null : new CrossRef(
+                seqs, dataset).findXrefSourcesForSequences(dna);
+
+        // start of pass2: retrieve each cross-ref for fetched or restored
+        // project.
+        do // first cross ref and recover crossref loop
+        {
+
+          for (String db : ptypes)
+          {
+            // counter for splitframe views retrieved via crossref
+            int firstcr_ap = 0;
+            // build next key so we an retrieve all views
+            String nextxref = first + " -> " + db + "{" + firstcr_ap + "}";
+            // perform crossref action, or retrieve stored project
+            List<AlignmentViewPanel> cra_views = new ArrayList<AlignmentViewPanel>();
+            CrossRefAction cra = null;
+            
+            if (pass2 == 0)
+            { // retrieve and show cross-refs in this thread
+              cra = new CrossRefAction(af, seqs, dna, db);
+              cra.run();
+              if (cra.getXrefViews().size() == 0)
+              {
+                failedXrefMenuItems.add("No crossrefs retrieved for "
+                        + first + " -> " + db);
+                continue;
+              }
+              cra_views = cra.getXrefViews();
+              assertNucleotide(cra_views.get(0),
+                      "Nucleotide panel included proteins for " + first
+                              + " -> " + db);
+              assertProtein(cra_views.get(1),
+                      "Protein panel included nucleotides for " + first
+                              + " -> " + db);
+            }
+            else
+            {
+              Desktop.instance.closeAll_actionPerformed(null);
+              pass3 = 0;
+              // recover stored project
+              File storedProject = savedProjects.get(nextxref);
+              if (storedProject == null)
+              {
+                failedProjectRecoveries.add("Failed to store a view for '"
+                        + nextxref + "'");
+                continue;
+              }
+
+              // recover stored project
+              AlignFrame af2 = new FileLoader(false)
+                      .LoadFileWaitTillLoaded(savedProjects.get(nextxref)
+                              .toString(), FormatAdapter.FILE);
+              System.out.println("Recovered view for '" + nextxref
+                      + "' from '" + savedProjects.get(nextxref).toString()
+                      + "'");
+              // gymnastics to recover the alignPanel/Complementary alignPanel
+              if (af2.getViewport().isNucleotide())
+              {
+                // top view, then bottom
+                cra_views.add(af2.getViewport().getAlignPanel());
+                cra_views.add(((jalview.gui.AlignViewport) af2
+                        .getViewport().getCodingComplement())
+                        .getAlignPanel());
+
+              }
+              else
+              {
+                // bottom view, then top
+                cra_views.add(((jalview.gui.AlignViewport) af2
+                        .getViewport().getCodingComplement())
+                        .getAlignPanel());
+                cra_views.add(af2.getViewport().getAlignPanel());
+
+              }
+            }
+            HashMap<String, List<String>> xrptypes = new HashMap<String, List<String>>();
+            // first save/verify views.
+            for (AlignmentViewPanel avp : cra_views)
+            {
+              nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}";
+              // verify references for this panel
+              AlignmentTest.assertAlignmentDatasetRefs(avp.getAlignment(),
+                      "Pass (" + pass1 + "," + pass2 + "," + pass3
+                              + "): before start of pass3: " + nextxref
+                              + ":");
+              assertDatasetIsNormalisedKnownDefect(avp.getAlignment(),
+                      "Pass (" + pass1 + "," + pass2 + "," + pass3
+                              + "): before start of pass3: " + nextxref
+                              + ":");
+
+              SequenceI[] xrseqs = avp.getAlignment().getSequencesArray();
+
+              List<String> _xrptypes = (seqs == null || seqs.length == 0) ? null
+                      : new CrossRef(xrseqs, dataset)
+                              .findXrefSourcesForSequences(avp
+                                      .getAlignViewport().isNucleotide());
+              
+              stringify(dbtoviewBit, savedProjects, nextxref, avp);
+              xrptypes.put(nextxref, _xrptypes);
+
+            }
+
+            // now do the second xref pass starting from either saved or just
+            // recovered split pane, in sequence
+            do // retrieve second set of cross refs or recover and verify
+            {
+              firstcr_ap = 0;
+              for (AlignmentViewPanel avp : cra_views)
+              {
+                nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}";
+                for (String xrefdb : xrptypes.get(nextxref))
+                {
+                  List<AlignmentViewPanel> cra_views2 = new ArrayList<AlignmentViewPanel>();
+                  int q = 0;
+                  String nextnextxref = nextxref
+                          + " -> " + xrefdb + "{" + q + "}";
+
+                  if (pass3 == 0)
+                  {
+
+                    SequenceI[] xrseqs = avp.getAlignment()
+                            .getSequencesArray();
+                    AlignFrame nextaf = Desktop.getAlignFrameFor(avp
+                            .getAlignViewport());
+
+                    cra = new CrossRefAction(nextaf, xrseqs, avp
+                            .getAlignViewport().isNucleotide(), xrefdb);
+                    cra.run();
+                    if (cra.getXrefViews().size() == 0)
+                    {
+                      failedXrefMenuItems
+                              .add("No crossrefs retrieved for '"
+                              + nextxref + "' to " + xrefdb + " via '"
+                              + nextaf.getTitle() + "'");
+                      continue;
+                    }
+                    cra_views2 = cra.getXrefViews();
+                    assertNucleotide(cra_views2.get(0),
+                            "Nucleotide panel included proteins for '"
+                                    + nextxref + "' to " + xrefdb
+                                    + " via '" + nextaf.getTitle() + "'");
+                    assertProtein(cra_views2.get(1),
+                            "Protein panel included nucleotides for '"
+                                    + nextxref + "' to " + xrefdb
+                                    + " via '" + nextaf.getTitle() + "'");
+
+                  }
+                  else
+                  {
+                    Desktop.instance.closeAll_actionPerformed(null);
+                    // recover stored project
+                    File storedProject = savedProjects.get(nextnextxref);
+                    if (storedProject == null)
+                    {
+                      failedProjectRecoveries
+                              .add("Failed to store a view for '"
+                                      + nextnextxref + "'");
+                      continue;
+                    }
+                    AlignFrame af2 = new FileLoader(false)
+                            .LoadFileWaitTillLoaded(
+                                    savedProjects.get(nextnextxref)
+                                            .toString(), FormatAdapter.FILE);
+                    System.out.println("Recovered view for '"
+                            + nextnextxref + "' from '"
+                            + savedProjects.get(nextnextxref).toString()
+                            + "'");
+                    // gymnastics to recover the alignPanel/Complementary
+                    // alignPanel
+                    if (af2.getViewport().isNucleotide())
+                    {
+                      // top view, then bottom
+                      cra_views2.add(af2.getViewport().getAlignPanel());
+                      cra_views2.add(((jalview.gui.AlignViewport) af2
+                              .getViewport().getCodingComplement())
+                              .getAlignPanel());
+
+                    }
+                    else
+                    {
+                      // bottom view, then top
+                      cra_views2.add(((jalview.gui.AlignViewport) af2
+                              .getViewport().getCodingComplement())
+                              .getAlignPanel());
+                      cra_views2.add(af2.getViewport().getAlignPanel());
+                    }
+                    Assert.assertEquals(cra_views2.size(), 2);
+                    Assert.assertNotNull(cra_views2.get(0));
+                    Assert.assertNotNull(cra_views2.get(1));
+                  }
+
+                  for (AlignmentViewPanel nextavp : cra_views2)
+                  {
+                    nextnextxref = nextxref
+                            + " -> " + xrefdb + "{" + q++ + "}";
+
+                    // verify references for this panel
+                    AlignmentTest.assertAlignmentDatasetRefs(
+                            nextavp.getAlignment(), "" + "Pass (" + pass1
+                                    + "," + pass2 + "): For "
+                                    + nextnextxref + ":");
+                    assertDatasetIsNormalisedKnownDefect(
+                            nextavp.getAlignment(), "" + "Pass (" + pass1
+                                    + "," + pass2 + "): For "
+                                    + nextnextxref + ":");
+
+                    stringify(dbtoviewBit, savedProjects, nextnextxref,
+                            nextavp);
+                    keyseq.add(nextnextxref);
+                  }
+                } // end of loop around showing all xrefdb for crossrf2
+
+              } // end of loop around all viewpanels from crossrf1
+            } while (pass2 == 2 && pass3++ < 2);
+            // fetchdb->crossref1->crossref-2->verify for xrefs we
+            // either loop twice when pass2=0, or just once when pass2=1
+            // (recovered project from previous crossref)
+
+          } // end of loop over db-xrefs for crossref-2
+
+          // fetchdb-->crossref1
+          // for each xref we try to retrieve xref, store and verify when
+          // pass1=0, or just retrieve and verify when pass1=1
+        } while (pass1 == 1 && pass2++ < 2);
+        // fetchdb
+        // for each ref we
+        // loop twice: first, do the retrieve, second recover from saved project
+
+        // increment pass counters, so we repeat traversal starting from the
+        // oldest saved project first.
+        if (pass1 == 0)
+        {
+          // verify stored projects for first set of cross references
+          pass1 = 1;
+          // and verify cross-references retrieved from stored projects
+          pass2 = 0;
+          pass3 = 0;
+        }
+        else
+        {
+          pass1++;
+        }
+      } while (pass1 < 3);
+    }
+    if (failedXrefMenuItems.size() > 0)
+    {
+      for (String s : failedXrefMenuItems)
+      {
+        System.err.println(s);
+      }
+      Assert.fail("Faulty xref menu (" + failedXrefMenuItems.size()
+              + " counts)");
+    }
+    if (failedProjectRecoveries.size() > 0)
+    {
+
+      for (String s : failedProjectRecoveries)
+      {
+        System.err.println(s);
+      }
+      Assert.fail("Didn't recover projects for some retrievals (did they retrieve ?) ("
+              + failedProjectRecoveries.size() + " counts)");
+    }
+    if (failedDBRetr.size() > 0)
+    {
+      for (String s : failedProjectRecoveries)
+      {
+        System.err.println(s);
+      }
+      Assert.fail("Didn't retrieve some db refs for checking cross-refs ("
+              + failedDBRetr.size() + " counts)");
+    }
+  }
+
+  /**
+   * wrapper to trap known defect for AH002001 testcase
+   * 
+   * @param alignment
+   * @param string
+   */
+  private void assertDatasetIsNormalisedKnownDefect(AlignmentI al,
+          String message)
+  {
+    try
+    {
+      AlignmentTest.assertDatasetIsNormalised(al, message);
+    } catch (AssertionError ae)
+    {
+      if (!ae.getMessage().endsWith("EMBL|AH002001"))
+      {
+        throw ae;
+      }
+      else
+      {
+        System.out
+                .println("Ignored exception for known defect: JAL-2179 : "
+                        + message);
+      }
+
+    }
+  }
+
+  private void assertProtein(AlignmentViewPanel alignmentViewPanel,
+          String message)
+  {
+    assertType(true, alignmentViewPanel, message);
+  }
+
+  private void assertNucleotide(AlignmentViewPanel alignmentViewPanel,
+          String message)
+  {
+    assertType(false, alignmentViewPanel, message);
+  }
+
+  private void assertType(boolean expectProtein,
+          AlignmentViewPanel alignmentViewPanel, String message)
+  {
+    List<SequenceI> nonType = new ArrayList<SequenceI>();
+    for (SequenceI sq : alignmentViewPanel.getAlignViewport()
+            .getAlignment()
+            .getSequences())
+    {
+      if (sq.isProtein() != expectProtein)
+      {
+        nonType.add(sq);
+      }
+    }
+    if (nonType.size() > 0)
+    {
+      Assert.fail(message + " [ "
+              + (expectProtein ? "nucleotides were " : "proteins were ")
+              + nonType.toString()
+              + " ]");
+    }
+  }
+
+  /**
+   * first time called, record strings derived from alignment and
+   * alignedcodonframes, and save view to a project file. Second time called,
+   * compare strings to existing ones. org.testng.Assert.assertTrue on
+   * stringmatch
+   * 
+   * @param dbtoviewBit
+   *          map between xrefpath and view string
+   * @param savedProjects
+   *          - map from xrefpath to saved project filename (createTempFile)
+   * @param xrefpath
+   *          - xrefpath - unique ID for this context (composed of sequence of
+   *          db-fetch/cross-ref actions preceeding state)
+   * @param avp
+   *          - viewpanel to store (for viewpanels in splitframe, the same
+   *          project should be written for both panels, only one needs
+   *          recovering for comparison on the next stringify call, but each
+   *          viewpanel needs to be called with a distinct xrefpath to ensure
+   *          each one's strings are compared)
+   */
+  private void stringify(HashMap<String, String> dbtoviewBit,
+          HashMap<String, File> savedProjects, String xrefpath,
+          AlignmentViewPanel avp)
+  {
+    if (savedProjects != null)
+    {
+      if (savedProjects.get(xrefpath) == null)
+      {
+        // write a project file for this view. On the second pass, this will be
+        // recovered and cross-references verified
+        try
+        {
+          File prfile = File.createTempFile("crossRefTest", ".jvp");
+          AlignFrame af = Desktop.getAlignFrameFor(avp.getAlignViewport());
+          new Jalview2XML(false).saveAlignment(af, prfile.toString(),
+                  af.getTitle());
+          System.out.println("Written view from '" + xrefpath + "' as '"
+                  + prfile.getAbsolutePath() + "'");
+          savedProjects.put(xrefpath, prfile);
+        } catch (IOException q)
+        {
+          Assert.fail("Unexpected IO Exception", q);
+        }
+      }
+      else
+      {
+        System.out.println("Stringify check on view from '" + xrefpath
+                + "' [ possibly retrieved from '"
+                + savedProjects.get(xrefpath).getAbsolutePath() + "' ]");
+
+      }
+    }
+
+    StringBuilder sbr = new StringBuilder();
+    sbr.append(avp.getAlignment().toString());
+    sbr.append("\n");
+    sbr.append("<End of alignment>");
+    sbr.append("\n");
+    sbr.append(avp.getAlignment().getDataset());
+    sbr.append("\n");
+    sbr.append("<End of dataset>");
+    sbr.append("\n");
+    int p = 0;
+    if (avp.getAlignment().getCodonFrames() != null)
+    {
+      for (AlignedCodonFrame ac : avp.getAlignment().getCodonFrames())
+      {
+        sbr.append("<AlignedCodonFrame " + p++ + ">");
+        sbr.append("\n");
+        sbr.append(ac.toString());
+        sbr.append("\n");
+      }
+    }
+    String dbt = dbtoviewBit.get(xrefpath);
+    if (dbt == null)
+    {
+      dbtoviewBit.put(xrefpath, sbr.toString());
+    }
+    else
+    {
+      Assert.assertEquals(sbr.toString(), dbt, "stringify mismatch for "
+              + xrefpath);
+    }
+  }
+}
diff --git a/test/jalview/io/Jalview2xmlBase.java b/test/jalview/io/Jalview2xmlBase.java
new file mode 100644 (file)
index 0000000..379fd68
--- /dev/null
@@ -0,0 +1,76 @@
+package jalview.io;
+
+import jalview.bin.Cache;
+import jalview.bin.Jalview;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.SequenceI;
+import jalview.gui.Desktop;
+
+import java.util.Date;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeTest;
+
+public class Jalview2xmlBase
+{
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeClass(alwaysRun = true)
+  public static void setUpBeforeClass() throws Exception
+  {
+    /*
+     * use read-only test properties file
+     */
+    Cache.loadProperties("test/jalview/io/testProps.jvprops");
+
+    /*
+     * set news feed last read to a future time to ensure no
+     * 'unread' news item is displayed
+     */
+    Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000);
+    Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow);
+
+    Jalview.main(new String[] {});
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterClass(alwaysRun = true)
+  public static void tearDownAfterClass() throws Exception
+  {
+    jalview.gui.Desktop.instance.closeAll_actionPerformed(null);
+  }
+
+  @BeforeTest(alwaysRun = true)
+  public static void clearDesktop()
+  {
+    if (Desktop.instance != null && Desktop.getAlignFrames() != null)
+    {
+      Desktop.instance.closeAll_actionPerformed(null);
+    }
+  }
+
+  public int countDsAnn(jalview.viewmodel.AlignmentViewport avp)
+  {
+    int numdsann = 0;
+    for (SequenceI sq : avp.getAlignment().getDataset().getSequences())
+    {
+      if (sq.getAnnotation() != null)
+      {
+        for (AlignmentAnnotation dssa : sq.getAnnotation())
+        {
+          if (dssa.isValidStruc())
+          {
+            numdsann++;
+          }
+        }
+      }
+    }
+    return numdsann;
+  }
+
+}
index 784f3dd..f7853ff 100644 (file)
@@ -29,8 +29,6 @@ import static org.testng.AssertJUnit.assertTrue;
 import jalview.api.AlignViewportI;
 import jalview.api.AlignmentViewPanel;
 import jalview.api.ViewStyleI;
-import jalview.bin.Cache;
-import jalview.bin.Jalview;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.HiddenSequences;
@@ -49,70 +47,18 @@ import jalview.viewmodel.AlignmentViewport;
 
 import java.io.File;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 import org.testng.Assert;
 import org.testng.AssertJUnit;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 @Test(singleThreaded = true)
-public class Jalview2xmlTests
+public class Jalview2xmlTests extends Jalview2xmlBase
 {
 
-  /**
-   * @throws java.lang.Exception
-   */
-  @BeforeClass(alwaysRun = true)
-  public static void setUpBeforeClass() throws Exception
-  {
-    /*
-     * use read-only test properties file
-     */
-    Cache.loadProperties("test/jalview/io/testProps.jvprops");
-
-    /*
-     * set news feed last read to a future time to ensure no
-     * 'unread' news item is displayed
-     */
-    Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000);
-    Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow);
-
-    Jalview.main(new String[] {});
-  }
-
-  /**
-   * @throws java.lang.Exception
-   */
-  @AfterClass(alwaysRun = true)
-  public static void tearDownAfterClass() throws Exception
-  {
-    Desktop.instance.closeAll_actionPerformed(null);
-  }
-
-  int countDsAnn(jalview.viewmodel.AlignmentViewport avp)
-  {
-    int numdsann = 0;
-    for (SequenceI sq : avp.getAlignment().getDataset().getSequences())
-    {
-      if (sq.getAnnotation() != null)
-      {
-        for (AlignmentAnnotation dssa : sq.getAnnotation())
-        {
-          if (dssa.isValidStruc())
-          {
-            numdsann++;
-          }
-        }
-      }
-    }
-    return numdsann;
-  }
-
   @Test(groups = { "Functional" })
   public void testRNAStructureRecovery() throws Exception
   {
diff --git a/test/jalview/io/testProps_nodas.jvprops b/test/jalview/io/testProps_nodas.jvprops
new file mode 100644 (file)
index 0000000..da95549
--- /dev/null
@@ -0,0 +1,83 @@
+#---JalviewX Properties File---
+#Fri Apr 25 09:54:25 BST 2014
+SCREEN_Y=768
+SCREEN_X=936
+SHOW_WSDISCOVERY_ERRORS=true
+LATEST_VERSION=2.8.0b1
+SHOW_CONSERVATION=true
+JALVIEW_RSS_WINDOW_SCREEN_WIDTH=550
+JAVA_CONSOLE_SCREEN_WIDTH=450
+LAST_DIRECTORY=/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples
+ID_ITALICS=true
+SORT_ALIGNMENT=No sort
+SHOW_IDENTITY=true
+WSMENU_BYHOST=false
+SEQUENCE_LINKS=EMBL-EBI Search|http\://www.ebi.ac.uk/ebisearch/search.ebi?db\=allebi&query\=$SEQUENCE_ID$
+SHOW_FULLSCREEN=false
+RECENT_URL=http\://www.jalview.org/examples/exampleFile_2_7.jar
+FONT_NAME=SansSerif
+BLC_JVSUFFIX=true
+VERSION_CHECK=false
+YEAR=2011
+SHOW_DBREFS_TOOLTIP=true
+MSF_JVSUFFIX=true
+SCREENGEOMETRY_HEIGHT=1600
+JAVA_CONSOLE_SCREEN_Y=475
+JAVA_CONSOLE_SCREEN_X=830
+PFAM_JVSUFFIX=true
+PIR_JVSUFFIX=true
+STARTUP_FILE=http\://www.jalview.org/examples/exampleFile_2_3.jar
+JAVA_CONSOLE_SCREEN_HEIGHT=162
+PIR_MODELLER=false
+GAP_SYMBOL=-
+SHOW_QUALITY=true
+SHOW_GROUP_CONSERVATION=false
+SHOW_JWS2_SERVICES=true
+SHOW_NPFEATS_TOOLTIP=true
+FONT_STYLE=plain
+ANTI_ALIAS=false
+SORT_BY_TREE=false
+RSBS_SERVICES=|Multi-Harmony|Analysis|Sequence Harmony and Multi-Relief (Brandt et al. 2010)|hseparable,gapCharacter\='-',returns\='ANNOTATION'|?tool\=jalview|http\://zeus.few.vu.nl/programs/shmrwww/index.php?tool\=jalview&groups\=$PARTITION\:min\='2',minsize\='2',sep\=' '$&ali_file\=$ALIGNMENT\:format\='FASTA',writeasfile$
+AUTHORFNAMES=Jim Procter, Andrew Waterhouse, Jan Engelhardt, Lauren Lui, Michele Clamp, James Cuff, Steve Searle, David Martin & Geoff Barton
+JALVIEW_RSS_WINDOW_SCREEN_HEIGHT=328
+SHOW_GROUP_CONSENSUS=false
+SHOW_CONSENSUS_HISTOGRAM=true
+SHOW_OVERVIEW=false
+AUTHORS=J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
+FIGURE_AUTOIDWIDTH=false
+SCREEN_WIDTH=900
+ANNOTATIONCOLOUR_MIN=ffc800
+SHOW_STARTUP_FILE=false
+RECENT_FILE=examples/uniref50.fa\t/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples/RF00031_folded.stk\t/Volumes/Data/Users/jimp/bs_ig_mult.out
+DEFAULT_FILE_FORMAT=FASTA
+SHOW_JAVA_CONSOLE=false
+VERSION=2.8b1
+FIGURE_USERIDWIDTH=
+WSMENU_BYTYPE=false
+DEFAULT_COLOUR=None
+NOQUESTIONNAIRES=true
+JALVIEW_NEWS_RSS_LASTMODIFIED=Apr 23, 2014 2\:53\:26 PM
+BUILD_DATE=01 November 2013
+PILEUP_JVSUFFIX=true
+SHOW_CONSENSUS_LOGO=false
+SCREENGEOMETRY_WIDTH=2560
+SHOW_ANNOTATIONS=true
+JALVIEW_RSS_WINDOW_SCREEN_Y=0
+USAGESTATS=false
+JALVIEW_RSS_WINDOW_SCREEN_X=0
+SHOW_UNCONSERVED=false
+SHOW_JVSUFFIX=true
+SCREEN_HEIGHT=650
+ANNOTATIONCOLOUR_MAX=ff0000
+AUTO_CALC_CONSENSUS=true
+FASTA_JVSUFFIX=true
+DAS_ACTIVE_SOURCE=
+JWS2HOSTURLS=http\://www.compbio.dundee.ac.uk/jabaws
+PAD_GAPS=false
+CLUSTAL_JVSUFFIX=true
+SHOW_ENFIN_SERVICES=true
+FONT_SIZE=10
+RIGHT_ALIGN_IDS=false
+USE_PROXY=false
+WRAP_ALIGNMENT=false
+DAS_REGISTRY_URL=http\://www.nowhere/
index b3c7e10..59bf445 100644 (file)
@@ -37,6 +37,7 @@ import jalview.ws.dbsources.Pdb;
 import jalview.ws.dbsources.Uniprot;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import org.testng.annotations.AfterClass;
@@ -75,7 +76,9 @@ public class DbRefFetcherTest
   @Test(groups = { "Functional" })
   public void testStandardProtDbs()
   {
-    String[] defdb = DBRefSource.PROTEINDBS;
+    List<String> defdb = new ArrayList<String>();
+    defdb.addAll(Arrays.asList(DBRefSource.PROTEINDBS));
+    defdb.add(DBRefSource.PDB);
     List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
     SequenceFetcher sfetcher = new SequenceFetcher();
     boolean pdbFound = false;