import jalview.datamodel.DBRefEntry;\r
import jalview.datamodel.Sequence;\r
import jalview.datamodel.SequenceI;\r
-import jalview.ws.ASequenceFetcher;\r
import jalview.ws.SequenceFetcher;\r
+import jalview.ws.seqfetcher.ASequenceFetcher;\r
\r
/**\r
* Functions for cross-referencing sequence databases. user must first specify\r
else\r
{\r
rfs = jalview.util.DBRefUtils.selectRefs(rfs,\r
- DBRefSource.DNACODINGDBS); // could attempt to find other cross refs and return here - ie PDB xrefs (not dna, not protein seq)\r
+ DBRefSource.DNACODINGDBS); // could attempt to find other cross\r
+ // refs and return here - ie PDB xrefs\r
+ // (not dna, not protein seq)\r
}\r
return rfs;\r
}\r
\r
-\r
public static Hashtable classifyDbRefs(DBRefEntry[] rfs)\r
{\r
Hashtable classes = new Hashtable();\r
- classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS));\r
- classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils.selectRefs(rfs,\r
- DBRefSource.DNACODINGDBS));\r
- classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs(rfs,\r
- DBRefSource.DOMAINDBS));\r
+ classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs(\r
+ rfs, DBRefSource.PROTEINDBS));\r
+ classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils\r
+ .selectRefs(rfs, DBRefSource.DNACODINGDBS));\r
+ classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs(\r
+ rfs, DBRefSource.DOMAINDBS));\r
// classes.put(OTHER, )\r
return classes;\r
}\r
\r
/**\r
* @param dna\r
- * true if seqs are DNA seqs\r
+ * true if seqs are DNA seqs\r
* @param seqs\r
* @return a list of sequence database cross reference source types\r
*/\r
{\r
return findSequenceXrefTypes(dna, seqs, null);\r
}\r
+\r
/**\r
- * Indirect references are references from other sequences from the dataset to any of the direct\r
- * DBRefEntrys on the given sequences.\r
+ * Indirect references are references from other sequences from the dataset to\r
+ * any of the direct DBRefEntrys on the given sequences.\r
+ * \r
* @param dna\r
- * true if seqs are DNA seqs\r
+ * true if seqs are DNA seqs\r
* @param seqs\r
* @return a list of sequence database cross reference source types\r
*/\r
- public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs, AlignmentI dataset)\r
+ public static String[] findSequenceXrefTypes(boolean dna,\r
+ SequenceI[] seqs, AlignmentI dataset)\r
{\r
String[] dbrefs = null;\r
Vector refs = new Vector();\r
for (int s = 0; s < seqs.length; s++)\r
{\r
SequenceI dss = seqs[s];\r
- while (dss.getDatasetSequence()!=null)\r
+ while (dss.getDatasetSequence() != null)\r
{\r
dss = dss.getDatasetSequence();\r
}\r
refs.addElement(rfs[r].getSource());\r
}\r
}\r
- if (dataset!=null)\r
+ if (dataset != null)\r
{\r
// search for references to this sequence's direct references.\r
DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef());\r
Vector rseqs = new Vector();\r
- CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, null); // don't need to specify codon frame for mapping here\r
+ CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs,\r
+ null); // don't need to specify codon frame for mapping here\r
Enumeration lr = rseqs.elements();\r
while (lr.hasMoreElements())\r
{\r
SequenceI rs = (SequenceI) lr.nextElement();\r
DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());\r
- for (int r=0; rfs != null && r < rfs.length; r++)\r
+ for (int r = 0; rfs != null && r < rfs.length; r++)\r
{\r
if (!refs.contains(rfs[r].getSource()))\r
{\r
* @param dna\r
* @param source\r
* @param dataset\r
- * alignment to search for product sequences.\r
+ * alignment to search for product sequences.\r
* @return products (as dataset sequences)\r
*/\r
public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,\r
{\r
Vector rseqs = new Vector();\r
Alignment ral = null;\r
- AlignedCodonFrame cf=new AlignedCodonFrame(0); // nominal width\r
+ AlignedCodonFrame cf = new AlignedCodonFrame(0); // nominal width\r
for (int s = 0; s < seqs.length; s++)\r
{\r
SequenceI dss = seqs[s];\r
- while (dss.getDatasetSequence()!=null)\r
+ while (dss.getDatasetSequence() != null)\r
{\r
dss = dss.getDatasetSequence();\r
}\r
boolean found = false;\r
DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRef());\r
- if ((xrfs == null || xrfs.length == 0) && dataset!=null)\r
+ if ((xrfs == null || xrfs.length == 0) && dataset != null)\r
{\r
System.out.println("Attempting to find ds Xrefs refs.");\r
- DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less ambiguous would be a 'find primary dbRefEntry' method.\r
+ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less\r
+ // ambiguous\r
+ // would\r
+ // be a\r
+ // 'find\r
+ // primary\r
+ // dbRefEntry'\r
+ // method.\r
// filter for desired source xref here\r
- found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, rseqs, cf);\r
+ found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,\r
+ rseqs, cf);\r
}\r
- for (int r = 0; xrfs!=null && r < xrfs.length; r++)\r
+ for (int r = 0; xrfs != null && r < xrfs.length; r++)\r
{\r
if (source != null && !source.equals(xrfs[r].getSource()))\r
continue;\r
{\r
Sequence rsq = new Sequence(xrfs[r].getMap().getTo());\r
rseqs.addElement(rsq);\r
- if (xrfs[r].getMap().getMap().getFromRatio()!=xrfs[r].getMap().getMap().getToRatio())\r
+ if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r]\r
+ .getMap().getMap().getToRatio())\r
{\r
// get sense of map correct for adding to product alignment.\r
if (dna)\r
{\r
// map is from dna seq to a protein product\r
cf.addMap(dss, rsq, xrfs[r].getMap().getMap());\r
- } else {\r
+ }\r
+ else\r
+ {\r
// map should be from protein seq to its coding dna\r
cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse());\r
}\r
found = true;\r
}\r
}\r
- else\r
+ if (!found)\r
{\r
// do a bit more work - search for sequences with references matching\r
// xrefs on this sequence.\r
if (dataset != null)\r
{\r
- found = searchDataset(dss, xrfs[r], dataset, rseqs, cf);\r
+ found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf);\r
if (found)\r
xrfs[r] = null; // we've recovered seqs for this one.\r
}\r
for (int r = 0; r < xrfs.length; r++)\r
{\r
// filter out any irrelevant or irretrievable references\r
- if (xrfs[r]==null || ((source != null && !source.equals(xrfs[r].getSource()))\r
- || !sftch.isFetchable(xrfs[r].getSource())))\r
+ if (xrfs[r] == null\r
+ || ((source != null && !source.equals(xrfs[r]\r
+ .getSource())) || !sftch.isFetchable(xrfs[r]\r
+ .getSource())))\r
{\r
l--;\r
xrfs[r] = null;\r
if (l > 0)\r
{\r
System.out\r
- .println("Attempting to retrieve cross referenced sequences.");\r
+ .println("Attempting to retrieve cross referenced sequences.");\r
DBRefEntry[] t = new DBRefEntry[l];\r
l = 0;\r
for (int r = 0; r < xrfs.length; r++)\r
xrfs = t;\r
try\r
{\r
- retrieved = sftch.getSequences(xrfs);\r
+ retrieved = sftch.getSequences(xrfs); // problem here is we don't know which of xrfs resulted in which retrieved element\r
} catch (Exception e)\r
{\r
System.err\r
- .println("Problem whilst retrieving cross references for Sequence : "\r
- + seqs[s].getName());\r
+ .println("Problem whilst retrieving cross references for Sequence : "\r
+ + seqs[s].getName());\r
e.printStackTrace();\r
}\r
if (retrieved != null)\r
{\r
for (int rs = 0; rs < retrieved.length; rs++)\r
{\r
+ // TODO: examine each sequence for 'redundancy'\r
+ jalview.datamodel.DBRefEntry[] dbr = retrieved[rs].getDBRef();\r
+ if (dbr != null && dbr.length > 0)\r
+ {\r
+ for (int di = 0; di < dbr.length; di++)\r
+ {\r
+ // find any entry where we should put in the sequence being cross-referenced into the map\r
+ jalview.datamodel.Mapping map = dbr[di].getMap();\r
+ if (map != null)\r
+ {\r
+ if (map.getTo() != null && map.getMap() != null)\r
+ {\r
+ // should search the local dataset to find any existing candidates for To !\r
+ try\r
+ {\r
+ // compare ms with dss and replace with dss in mapping if map is congruent\r
+ SequenceI ms = map.getTo();\r
+ int sf = map.getMap().getToLowest();\r
+ int st = map.getMap().getToHighest();\r
+ SequenceI mappedrg = ms.getSubSequence(sf, st);\r
+ SequenceI loc = dss.getSubSequence(sf, st);\r
+ if (mappedrg.getLength()>0 && mappedrg.getSequenceAsString().equals(\r
+ loc.getSequenceAsString()))\r
+ {\r
+ System.err\r
+ .println("Mapping updated for retrieved crossreference");\r
+ // method to update all refs of existing To on retrieved sequence with dss and merge any props on To onto dss.\r
+ map.setTo(dss);\r
+ }\r
+ } catch (Exception e)\r
+ {\r
+ System.err\r
+ .println("Exception when consolidating Mapped sequence set...");\r
+ e.printStackTrace(System.err);\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ retrieved[rs].updatePDBIds();\r
rseqs.addElement(retrieved[rs]);\r
}\r
}\r
SequenceI[] rsqs = new SequenceI[rseqs.size()];\r
rseqs.copyInto(rsqs);\r
ral = new Alignment(rsqs);\r
- if (cf!=null && cf.getProtMappings()!=null)\r
+ if (cf != null && cf.getProtMappings() != null)\r
{\r
ral.addCodonFrame(cf);\r
}\r
}\r
\r
/**\r
- * find references to lrfs in the cross-reference set of each sequence in dataset (that is not equal to sequenceI)\r
- * Identifies matching DBRefEntry based on source and accession string only - Map and Version are nulled.\r
+ * find references to lrfs in the cross-reference set of each sequence in\r
+ * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry\r
+ * based on source and accession string only - Map and Version are nulled.\r
+ * \r
* @param sequenceI\r
* @param lrfs\r
* @param dataset\r
* @param rseqs\r
* @return true if matches were found.\r
*/\r
- private static boolean searchDatasetXrefs(SequenceI sequenceI, boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf)\r
+ private static boolean searchDatasetXrefs(SequenceI sequenceI,\r
+ boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs,\r
+ AlignedCodonFrame cf)\r
{\r
- boolean found=false;\r
- if (lrfs==null)\r
+ boolean found = false;\r
+ if (lrfs == null)\r
return false;\r
- for (int i=0;i<lrfs.length; i++)\r
+ for (int i = 0; i < lrfs.length; i++)\r
{\r
DBRefEntry xref = new DBRefEntry(lrfs[i]);\r
// add in wildcards\r
return found;\r
}\r
\r
-\r
/**\r
* search a given sequence dataset for references matching cross-references to\r
* the given sequence\r
* @param xrf\r
* @param dataset\r
* @param rseqs\r
+ * set of unique sequences\r
* @param cf\r
- * @return true if sequences were found and added\r
+ * @return true if one or more unique sequences were found and added\r
*/\r
public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,\r
AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf)\r
{\r
- return searchDataset(sequenceI, xrf,\r
- dataset, rseqs, cf, true, false);\r
+ return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);\r
}\r
+\r
/**\r
- * TODO: generalise to different protein classifications\r
- * Search dataset for DBRefEntrys matching the given one (xrf) and add\r
- * the associated sequence to rseq.\r
+ * TODO: generalise to different protein classifications Search dataset for\r
+ * DBRefEntrys matching the given one (xrf) and add the associated sequence to\r
+ * rseq.\r
+ * \r
* @param sequenceI\r
* @param xrf\r
* @param dataset\r
* @param rseqs\r
- * @param direct - search all references or only subset\r
- * @param dna search dna or protein xrefs (if direct=false)\r
+ * @param direct -\r
+ * search all references or only subset\r
+ * @param dna\r
+ * search dna or protein xrefs (if direct=false)\r
* @return true if relationship found and sequence added.\r
*/\r
public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,\r
- AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf, boolean direct, boolean dna)\r
+ AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf,\r
+ boolean direct, boolean dna)\r
{\r
boolean found = false;\r
- if (dataset==null) \r
+ if (dataset == null)\r
return false;\r
- if (dataset.getSequences()==null)\r
+ if (dataset.getSequences() == null)\r
{\r
System.err.println("Empty dataset sequence set - NO VECTOR");\r
return false;\r
if (nxt.getDatasetSequence() != null)\r
{\r
System.err\r
- .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");\r
+ .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");\r
}\r
if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())\r
{\r
- DBRefEntry[] poss=null, cands=null;\r
+ // look for direct or indirect references in common\r
+ DBRefEntry[] poss = null, cands = null;\r
if (direct)\r
{\r
- cands = jalview.util.DBRefUtils.searchRefs(poss=nxt\r
+ cands = jalview.util.DBRefUtils.searchRefs(poss = nxt\r
.getDBRef(), xrf);\r
- } else {\r
- cands = jalview.util.DBRefUtils.searchRefs(\r
- poss=CrossRef.findXDbRefs(dna, nxt.getDBRef()), xrf);\r
+ }\r
+ else\r
+ {\r
+ cands = jalview.util.DBRefUtils.searchRefs(poss = CrossRef\r
+ .findXDbRefs(dna, nxt.getDBRef()), xrf);\r
}\r
if (cands != null)\r
{\r
- rseqs.addElement(nxt);\r
- boolean foundmap= cf!=null; // don't search if we aren't given a codon map object\r
- for (int r=0; foundmap && r<cands.length; r++)\r
+ if (!rseqs.contains(nxt))\r
{\r
- if (cands[r].hasMap())\r
+ rseqs.addElement(nxt);\r
+ boolean foundmap = cf != null; // don't search if we aren't given\r
+ // a codon map object\r
+ for (int r = 0; foundmap && r < cands.length; r++)\r
{\r
- if (cands[r].getMap().getTo()!=null && cands[r].getMap().getMap().getFromRatio()!=cands[r].getMap().getMap().getToRatio())\r
+ if (cands[r].hasMap())\r
{\r
- foundmap=true;\r
- // get sense of map correct for adding to product alignment.\r
- if (dna)\r
+ if (cands[r].getMap().getTo() != null\r
+ && cands[r].getMap().getMap().getFromRatio() != cands[r]\r
+ .getMap().getMap().getToRatio())\r
{\r
- // map is from dna seq to a protein product\r
- cf.addMap(sequenceI, nxt, cands[r].getMap().getMap()); \r
- } else {\r
- // map should be from protein seq to its coding dna\r
- cf.addMap(nxt, sequenceI, cands[r].getMap().getMap().getInverse());\r
+ foundmap = true;\r
+ // get sense of map correct for adding to product alignment.\r
+ if (dna)\r
+ {\r
+ // map is from dna seq to a protein product\r
+ cf.addMap(sequenceI, nxt, cands[r].getMap().getMap());\r
+ }\r
+ else\r
+ {\r
+ // map should be from protein seq to its coding dna\r
+ cf.addMap(nxt, sequenceI, cands[r].getMap().getMap()\r
+ .getInverse());\r
+ }\r
}\r
}\r
}\r
+ // TODO: add mapping between sequences if necessary\r
+ found = true;\r
}\r
- // TODO: add mapping between sequences if necessary\r
- found = true;\r
}\r
- \r
+\r
}\r
}\r
}\r
}\r
\r
/**\r
- * precalculate different products that can be found for seqs in dataset\r
- * and return them.\r
+ * precalculate different products that can be found for seqs in dataset and\r
+ * return them.\r
+ * \r
* @param dna\r
* @param seqs\r
* @param dataset\r
- * @param fake - don't actually build lists - just get types\r
- * @return\r
- public static Object[] buildXProductsList(boolean dna, SequenceI[] seqs, AlignmentI dataset, boolean fake)\r
- {\r
- String types[] = jalview.analysis.CrossRef.findSequenceXrefTypes(\r
- dna, seqs, dataset);\r
- if (types != null)\r
- {\r
- System.out.println("Xref Types for: "+(dna ? "dna" : "prot"));\r
- for (int t = 0; t < types.length; t++)\r
- {\r
- System.out.println("Type: " + types[t]);\r
- SequenceI[] prod = \r
- jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);\r
- System.out.println("Found "\r
- + ((prod == null) ? "no" : "" + prod.length)\r
- + " products");\r
- if (prod!=null)\r
- {\r
- for (int p=0; p<prod.length; p++)\r
- {\r
- System.out.println("Prod "+p+": "+prod[p].getDisplayId(true));\r
- }\r
- }\r
- }\r
-\r
- } else {\r
- System.out.println("Trying getProducts for "+al.getSequenceAt(0).getDisplayId(true));\r
- System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));\r
- // have a bash at finding the products amongst all the retrieved sequences.\r
- SequenceI[] prod = jalview.analysis.CrossRef.findXrefSequences(al\r
- .getSequencesArray(), dna, null, ds);\r
- System.out.println("Found "\r
- + ((prod == null) ? "no" : "" + prod.length)\r
- + " products");\r
- if (prod!=null)\r
- {\r
- // select non-equivalent sequences from dataset list\r
- for (int p=0; p<prod.length; p++)\r
- {\r
- System.out.println("Prod "+p+": "+prod[p].getDisplayId(true));\r
- }\r
- }\r
-\r
- }\r
- }\r
+ * @param fake -\r
+ * don't actually build lists - just get types\r
+ * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]\r
+ * seqs, AlignmentI dataset, boolean fake) { String types[] =\r
+ * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,\r
+ * dataset); if (types != null) { System.out.println("Xref Types for:\r
+ * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {\r
+ * System.out.println("Type: " + types[t]); SequenceI[] prod =\r
+ * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);\r
+ * System.out.println("Found " + ((prod == null) ? "no" : "" +\r
+ * prod.length) + " products"); if (prod!=null) { for (int p=0; p<prod.length;\r
+ * p++) { System.out.println("Prod "+p+":\r
+ * "+prod[p].getDisplayId(true)); } } }\r
+ * } else { System.out.println("Trying getProducts for\r
+ * "+al.getSequenceAt(0).getDisplayId(true)); System.out.println("Search DS\r
+ * Xref for: "+(dna ? "dna" : "prot")); // have a bash at finding the products\r
+ * amongst all the retrieved sequences. SequenceI[] prod =\r
+ * jalview.analysis.CrossRef.findXrefSequences(al .getSequencesArray(), dna,\r
+ * null, ds); System.out.println("Found " + ((prod == null) ? "no" : "" +\r
+ * prod.length) + " products"); if (prod!=null) { // select non-equivalent\r
+ * sequences from dataset list for (int p=0; p<prod.length; p++) {\r
+ * System.out.println("Prod "+p+": "+prod[p].getDisplayId(true)); } }\r
+ * } }\r
*/\r
}
\ No newline at end of file