JAL-2210 factored out code for processing cross-referenced sequences retrieved via...
authorJim Procter <jprocter@issues.jalview.org>
Sat, 1 Oct 2016 12:21:29 +0000 (13:21 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Sat, 1 Oct 2016 12:21:29 +0000 (13:21 +0100)
src/jalview/analysis/CrossRef.java

index 05814c2..6779b87 100644 (file)
@@ -428,8 +428,16 @@ public class CrossRef
         // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL->
         SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence
                 : retrievedSequence.getDatasetSequence();
-        DBRefEntry[] dbr = retrievedSequence.getDBRefs();
-        if (dbr != null)
+        importCrossRefSeq(cf, dss, retrievedDss);
+        rseqs.add(retrievedDss);
+        if (dataset.findIndex(retrievedDss) == -1)
+        {
+          dataset.addSequence(retrievedDss);
+          matcher.add(retrievedDss);
+        }
+      }
+    }
+  }
 
   /**
    * Search dataset for sequences with a primary reference contained in
@@ -467,122 +475,125 @@ public class CrossRef
     }
   }
 
+  /**
+   * process sequence retrieved via a dbref on source sequence to resolve and
+   * transfer data
+   * 
+   * @param cf
+   * @param sourceSequence
+   * @param retrievedSequence
+   */
+  private void importCrossRefSeq(AlignedCodonFrame cf,
+          SequenceI sourceSequence, SequenceI retrievedSequence)
+  {
+    DBRefEntry[] dbr = retrievedSequence.getDBRefs();
+    if (dbr != null)
+    {
+      for (DBRefEntry dbref : dbr)
+      {
+        // find any entry where we should put in the sequence being
+        // cross-referenced into the map
+        Mapping map = dbref.getMap();
+        if (map != null)
         {
-          for (DBRefEntry dbref : dbr)
+          if (map.getTo() != null && map.getMap() != null)
           {
-            // find any entry where we should put in the sequence being
-            // cross-referenced into the map
-            Mapping map = dbref.getMap();
-            if (map != null)
+            // TODO findInDataset requires exact sequence match but
+            // 'congruent' test is only for the mapped part
+            // maybe not a problem in practice since only ENA provide a
+            // mapping and it is to the full protein translation of CDS
+            SequenceI matched = findInDataset(dbref);
+            // matcher.findIdMatch(map.getTo());
+            if (matched != null)
             {
-              if (map.getTo() != null && map.getMap() != null)
+              /*
+               * already got an xref to this sequence; update this
+               * map to point to the same sequence, and add
+               * any new dbrefs to it
+               */
+              DBRefEntry[] toRefs = map.getTo().getDBRefs();
+              if (toRefs != null)
               {
-                // TODO findInDataset requires exact sequence match but
-                // 'congruent' test is only for the mapped part
-                // maybe not a problem in practice since only ENA provide a
-                // mapping and it is to the full protein translation of CDS
-                SequenceI matched = findInDataset(dbref);
-                // matcher.findIdMatch(map.getTo());
-                if (matched != null)
+                for (DBRefEntry ref : toRefs)
                 {
-                  /*
-                   * already got an xref to this sequence; update this
-                   * map to point to the same sequence, and add
-                   * any new dbrefs to it
-                   */
-                  DBRefEntry[] toRefs = map.getTo().getDBRefs();
-                  if (toRefs != null)
-                  {
-                    for (DBRefEntry ref : toRefs)
-                    {
-                      matched.addDBRef(ref); // add or update mapping
-                    }
-                  }
-                  map.setTo(matched);
+                  matched.addDBRef(ref); // add or update mapping
                 }
-                else
-                {
-                  if (dataset.findIndex(map.getTo()) == -1)
-                  {
-                    dataset.addSequence(map.getTo());
-                    matcher.add(map.getTo());
-                  }
-                }
-                try
+              }
+              map.setTo(matched);
+            }
+            else
+            {
+              if (dataset.findIndex(map.getTo()) == -1)
+              {
+                dataset.addSequence(map.getTo());
+                matcher.add(map.getTo());
+              }
+            }
+
+            try
+            {
+              // compare ms with dss and replace with dss in mapping
+              // if map is congruent
+              SequenceI ms = map.getTo();
+              int sf = map.getMap().getToLowest();
+              int st = map.getMap().getToHighest();
+              SequenceI mappedrg = ms.getSubSequence(sf, st);
+              if (mappedrg.getLength() > 0
+                      && ms.getSequenceAsString().equals(
+                              sourceSequence.getSequenceAsString()))
+              {
+                String msg = "Mapping updated from " + ms.getName()
+                        + " to retrieved crossreference "
+                        + sourceSequence.getName();
+                System.out.println(msg);
+                map.setTo(sourceSequence);
+
+                /*
+                 * give the reverse reference the inverse mapping 
+                 * (if it doesn't have one already)
+                 */
+                setReverseMapping(sourceSequence, dbref, cf);
+
+                /*
+                 * copy sequence features as well, avoiding
+                 * duplication (e.g. same variation from two 
+                 * transcripts)
+                 */
+                SequenceFeature[] sfs = ms.getSequenceFeatures();
+                if (sfs != null)
                 {
-                  // compare ms with dss and replace with dss in mapping
-                  // if map is congruent
-                  SequenceI ms = map.getTo();
-                  int sf = map.getMap().getToLowest();
-                  int st = map.getMap().getToHighest();
-                  SequenceI mappedrg = ms.getSubSequence(sf, st);
-                  // SequenceI loc = dss.getSubSequence(sf, st);
-                  if (mappedrg.getLength() > 0
-                          && ms.getSequenceAsString().equals(
-                                  dss.getSequenceAsString()))
-                  // && mappedrg.getSequenceAsString().equals(
-                  // loc.getSequenceAsString()))
+                  for (SequenceFeature feat : sfs)
                   {
-                    String msg = "Mapping updated from " + ms.getName()
-                            + " to retrieved crossreference "
-                            + dss.getName();
-                    System.out.println(msg);
-                    map.setTo(dss);
-
-                    /*
-                     * give the reverse reference the inverse mapping 
-                     * (if it doesn't have one already)
-                     */
-                    setReverseMapping(dss, dbref, cf);
-
                     /*
-                     * copy sequence features as well, avoiding
-                     * duplication (e.g. same variation from two 
-                     * transcripts)
+                     * make a flyweight feature object which ignores Parent
+                     * attribute in equality test; this avoids creating many
+                     * otherwise duplicate exon features on genomic sequence
                      */
-                    SequenceFeature[] sfs = ms.getSequenceFeatures();
-                    if (sfs != null)
+                    SequenceFeature newFeature = new SequenceFeature(
+                            feat)
                     {
-                      for (SequenceFeature feat : sfs)
+                      @Override
+                      public boolean equals(Object o)
                       {
-                        /*
-                         * make a flyweight feature object which ignores Parent
-                         * attribute in equality test; this avoids creating many
-                         * otherwise duplicate exon features on genomic sequence
-                         */
-                        SequenceFeature newFeature = new SequenceFeature(
-                                feat)
-                        {
-                          @Override
-                          public boolean equals(Object o)
-                          {
-                            return super.equals(o, true);
-                          }
-                        };
-                        dss.addSequenceFeature(newFeature);
+                        return super.equals(o, true);
                       }
-                    }
+                    };
+                    sourceSequence.addSequenceFeature(newFeature);
                   }
-                  cf.addMap(retrievedDss, map.getTo(), map.getMap());
-                } catch (Exception e)
-                {
-                  System.err
-                          .println("Exception when consolidating Mapped sequence set...");
-                  e.printStackTrace(System.err);
                 }
               }
+              cf.addMap(retrievedSequence, map.getTo(), map.getMap());
+            } catch (Exception e)
+            {
+              System.err
+                      .println("Exception when consolidating Mapped sequence set...");
+              e.printStackTrace(System.err);
             }
           }
         }
-        retrievedSequence.updatePDBIds();
-        rseqs.add(retrievedDss);
-        if (dataset.findIndex(retrievedDss) == -1)
-        {
-          dataset.addSequence(retrievedDss);
-          matcher.add(retrievedDss);
-        }
       }
     }
+    retrievedSequence.updatePDBIds();
   }
   /**
    * Sets the inverse sequence mapping in the corresponding dbref of the mapped