ensure successive matches to a regex have distinct annotation name (indice suffix...
[jalview.git] / src / jalview / analysis / CrossRef.java
index d2f0358..159d96a 100644 (file)
@@ -11,8 +11,8 @@ import jalview.datamodel.DBRefSource;
 import jalview.datamodel.DBRefEntry;\r
 import jalview.datamodel.Sequence;\r
 import jalview.datamodel.SequenceI;\r
-import jalview.ws.ASequenceFetcher;\r
 import jalview.ws.SequenceFetcher;\r
+import jalview.ws.seqfetcher.ASequenceFetcher;\r
 \r
 /**\r
  * Functions for cross-referencing sequence databases. user must first specify\r
@@ -40,8 +40,8 @@ public class CrossRef
     {\r
       rfs = jalview.util.DBRefUtils.selectRefs(rfs,\r
               DBRefSource.DNACODINGDBS); // could attempt to find other cross\r
-                                          // refs and return here - ie PDB xrefs\r
-                                          // (not dna, not protein seq)\r
+      // refs and return here - ie PDB xrefs\r
+      // (not dna, not protein seq)\r
     }\r
     return rfs;\r
   }\r
@@ -158,7 +158,8 @@ public class CrossRef
       {\r
         if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))\r
         {\r
-          // retrieve CDS dataset sequences\r
+          System.err.println("TODO: unimplemented sequence retrieval for coding region sequence.");\r
+          // TODO: retrieve CDS dataset sequences\r
           // need global dataset sequence retriever/resolver to reuse refs\r
           // and construct Mapping entry.\r
           // insert gaps in CDS according to peptide gaps.\r
@@ -216,13 +217,13 @@ public class CrossRef
       {\r
         System.out.println("Attempting to find ds Xrefs refs.");\r
         DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less\r
-                                                                            // ambiguous\r
-                                                                            // would\r
-                                                                            // be a\r
-                                                                            // 'find\r
-                                                                            // primary\r
-                                                                            // dbRefEntry'\r
-                                                                            // method.\r
+        // ambiguous\r
+        // would\r
+        // be a\r
+        // 'find\r
+        // primary\r
+        // dbRefEntry'\r
+        // method.\r
         // filter for desired source xref here\r
         found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,\r
                 rseqs, cf);\r
@@ -305,7 +306,7 @@ public class CrossRef
             xrfs = t;\r
             try\r
             {\r
-              retrieved = sftch.getSequences(xrfs);\r
+              retrieved = sftch.getSequences(xrfs); // problem here is we don't know which of xrfs resulted in which retrieved element\r
             } catch (Exception e)\r
             {\r
               System.err\r
@@ -317,6 +318,46 @@ public class CrossRef
             {\r
               for (int rs = 0; rs < retrieved.length; rs++)\r
               {\r
+                // TODO: examine each sequence for 'redundancy'\r
+                jalview.datamodel.DBRefEntry[] dbr = retrieved[rs].getDBRef();\r
+                if (dbr != null && dbr.length > 0)\r
+                {\r
+                  for (int di = 0; di < dbr.length; di++)\r
+                  {\r
+                    // find any entry where we should put in the sequence being cross-referenced into the map\r
+                    jalview.datamodel.Mapping map = dbr[di].getMap();\r
+                    if (map != null)\r
+                    {\r
+                      if (map.getTo() != null && map.getMap() != null)\r
+                      {\r
+                        // should search the local dataset to find any existing candidates for To !\r
+                        try\r
+                        {\r
+                          // compare ms with dss and replace with dss in mapping if map is congruent\r
+                          SequenceI ms = map.getTo();\r
+                          int sf = map.getMap().getToLowest();\r
+                          int st = map.getMap().getToHighest();\r
+                          SequenceI mappedrg = ms.getSubSequence(sf, st);\r
+                          SequenceI loc = dss.getSubSequence(sf, st);\r
+                          if (mappedrg.getLength()>0 && mappedrg.getSequenceAsString().equals(\r
+                                  loc.getSequenceAsString()))\r
+                          {\r
+                            System.err\r
+                                    .println("Mapping updated for retrieved crossreference");\r
+                            // method to update all refs of existing To on retrieved sequence with dss and merge any props on To onto dss.\r
+                            map.setTo(dss);\r
+                          }\r
+                        } catch (Exception e)\r
+                        {\r
+                          System.err\r
+                                  .println("Exception when consolidating Mapped sequence set...");\r
+                          e.printStackTrace(System.err);\r
+                        }\r
+                      }\r
+                    }\r
+                  }\r
+                }\r
+                retrieved[rs].updatePDBIds();\r
                 rseqs.addElement(retrieved[rs]);\r
               }\r
             }\r
@@ -404,6 +445,7 @@ public class CrossRef
           boolean direct, boolean dna)\r
   {\r
     boolean found = false;\r
+    SequenceI[] typer=new SequenceI[1];\r
     if (dataset == null)\r
       return false;\r
     if (dataset.getSequences() == null)\r
@@ -424,6 +466,17 @@ public class CrossRef
         }\r
         if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())\r
         {\r
+          // check if this is the correct sequence type\r
+          {\r
+            typer[0] = nxt;\r
+            boolean isDna = jalview.util.Comparison.isNucleotide(typer);\r
+            if ((direct && isDna == dna) || (!direct && isDna!=dna))\r
+            {\r
+              // skip this sequence because it is same molecule type\r
+              continue;\r
+            }\r
+          }\r
+\r
           // look for direct or indirect references in common\r
           DBRefEntry[] poss = null, cands = null;\r
           if (direct)\r
@@ -442,7 +495,7 @@ public class CrossRef
             {\r
               rseqs.addElement(nxt);\r
               boolean foundmap = cf != null; // don't search if we aren't given\r
-                                              // a codon map object\r
+              // a codon map object\r
               for (int r = 0; foundmap && r < cands.length; r++)\r
               {\r
                 if (cands[r].hasMap())\r