From 006226fa282af3eed4c9770a20073b0f4fa990d4 Mon Sep 17 00:00:00 2001 From: jprocter Date: Fri, 7 Sep 2007 16:58:45 +0000 Subject: [PATCH] formatting and ensure that dataset is actually searched for sequences with dbrefs that correspond to a dbref crossreference sequence being retrieved (still outstanding bug for embl->proteinprod->emblcds retrieval --- src/jalview/analysis/CrossRef.java | 259 +++++++++++++++++++----------------- 1 file changed, 138 insertions(+), 121 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index f475ecb..d2f0358 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -39,27 +39,29 @@ public class CrossRef else { rfs = jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DNACODINGDBS); // could attempt to find other cross refs and return here - ie PDB xrefs (not dna, not protein seq) + DBRefSource.DNACODINGDBS); // could attempt to find other cross + // refs and return here - ie PDB xrefs + // (not dna, not protein seq) } return rfs; } - public static Hashtable classifyDbRefs(DBRefEntry[] rfs) { Hashtable classes = new Hashtable(); - classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs(rfs, DBRefSource.PROTEINDBS)); - classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DNACODINGDBS)); - classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs(rfs, - DBRefSource.DOMAINDBS)); + classes.put(DBRefSource.PROTEINDBS, jalview.util.DBRefUtils.selectRefs( + rfs, DBRefSource.PROTEINDBS)); + classes.put(DBRefSource.DNACODINGDBS, jalview.util.DBRefUtils + .selectRefs(rfs, DBRefSource.DNACODINGDBS)); + classes.put(DBRefSource.DOMAINDBS, jalview.util.DBRefUtils.selectRefs( + rfs, DBRefSource.DOMAINDBS)); // classes.put(OTHER, ) return classes; } /** * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ @@ -67,22 +69,25 @@ public class CrossRef { return findSequenceXrefTypes(dna, seqs, null); } + /** - * Indirect references are references from other sequences from the dataset to any of the direct - * DBRefEntrys on the given sequences. + * Indirect references are references from other sequences from the dataset to + * any of the direct DBRefEntrys on the given sequences. + * * @param dna - * true if seqs are DNA seqs + * true if seqs are DNA seqs * @param seqs * @return a list of sequence database cross reference source types */ - public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs, AlignmentI dataset) + public static String[] findSequenceXrefTypes(boolean dna, + SequenceI[] seqs, AlignmentI dataset) { String[] dbrefs = null; Vector refs = new Vector(); for (int s = 0; s < seqs.length; s++) { SequenceI dss = seqs[s]; - while (dss.getDatasetSequence()!=null) + while (dss.getDatasetSequence() != null) { dss = dss.getDatasetSequence(); } @@ -94,18 +99,19 @@ public class CrossRef refs.addElement(rfs[r].getSource()); } } - if (dataset!=null) + if (dataset != null) { // search for references to this sequence's direct references. DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); Vector rseqs = new Vector(); - CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, null); // don't need to specify codon frame for mapping here + CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, + null); // don't need to specify codon frame for mapping here Enumeration lr = rseqs.elements(); while (lr.hasMoreElements()) { SequenceI rs = (SequenceI) lr.nextElement(); DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); - for (int r=0; rfs != null && r < rfs.length; r++) + for (int r = 0; rfs != null && r < rfs.length; r++) { if (!refs.contains(rfs[r].getSource())) { @@ -188,7 +194,7 @@ public class CrossRef * @param dna * @param source * @param dataset - * alignment to search for product sequences. + * alignment to search for product sequences. * @return products (as dataset sequences) */ public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna, @@ -196,24 +202,32 @@ public class CrossRef { Vector rseqs = new Vector(); Alignment ral = null; - AlignedCodonFrame cf=new AlignedCodonFrame(0); // nominal width + AlignedCodonFrame cf = new AlignedCodonFrame(0); // nominal width for (int s = 0; s < seqs.length; s++) { SequenceI dss = seqs[s]; - while (dss.getDatasetSequence()!=null) + while (dss.getDatasetSequence() != null) { dss = dss.getDatasetSequence(); } boolean found = false; DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRef()); - if ((xrfs == null || xrfs.length == 0) && dataset!=null) + if ((xrfs == null || xrfs.length == 0) && dataset != null) { System.out.println("Attempting to find ds Xrefs refs."); - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less ambiguous would be a 'find primary dbRefEntry' method. + DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRef()); // less + // ambiguous + // would + // be a + // 'find + // primary + // dbRefEntry' + // method. // filter for desired source xref here - found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, rseqs, cf); + found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, + rseqs, cf); } - for (int r = 0; xrfs!=null && r < xrfs.length; r++) + for (int r = 0; xrfs != null && r < xrfs.length; r++) { if (source != null && !source.equals(xrfs[r].getSource())) continue; @@ -223,14 +237,17 @@ public class CrossRef { Sequence rsq = new Sequence(xrfs[r].getMap().getTo()); rseqs.addElement(rsq); - if (xrfs[r].getMap().getMap().getFromRatio()!=xrfs[r].getMap().getMap().getToRatio()) + if (xrfs[r].getMap().getMap().getFromRatio() != xrfs[r] + .getMap().getMap().getToRatio()) { // get sense of map correct for adding to product alignment. if (dna) { // map is from dna seq to a protein product cf.addMap(dss, rsq, xrfs[r].getMap().getMap()); - } else { + } + else + { // map should be from protein seq to its coding dna cf.addMap(rsq, dss, xrfs[r].getMap().getMap().getInverse()); } @@ -238,13 +255,13 @@ public class CrossRef found = true; } } - else + if (!found) { // do a bit more work - search for sequences with references matching // xrefs on this sequence. if (dataset != null) { - found = searchDataset(dss, xrfs[r], dataset, rseqs, cf); + found |= searchDataset(dss, xrfs[r], dataset, rseqs, cf); if (found) xrfs[r] = null; // we've recovered seqs for this one. } @@ -265,8 +282,10 @@ public class CrossRef for (int r = 0; r < xrfs.length; r++) { // filter out any irrelevant or irretrievable references - if (xrfs[r]==null || ((source != null && !source.equals(xrfs[r].getSource())) - || !sftch.isFetchable(xrfs[r].getSource()))) + if (xrfs[r] == null + || ((source != null && !source.equals(xrfs[r] + .getSource())) || !sftch.isFetchable(xrfs[r] + .getSource()))) { l--; xrfs[r] = null; @@ -275,7 +294,7 @@ public class CrossRef if (l > 0) { System.out - .println("Attempting to retrieve cross referenced sequences."); + .println("Attempting to retrieve cross referenced sequences."); DBRefEntry[] t = new DBRefEntry[l]; l = 0; for (int r = 0; r < xrfs.length; r++) @@ -290,8 +309,8 @@ public class CrossRef } catch (Exception e) { System.err - .println("Problem whilst retrieving cross references for Sequence : " - + seqs[s].getName()); + .println("Problem whilst retrieving cross references for Sequence : " + + seqs[s].getName()); e.printStackTrace(); } if (retrieved != null) @@ -310,7 +329,7 @@ public class CrossRef SequenceI[] rsqs = new SequenceI[rseqs.size()]; rseqs.copyInto(rsqs); ral = new Alignment(rsqs); - if (cf!=null && cf.getProtMappings()!=null) + if (cf != null && cf.getProtMappings() != null) { ral.addCodonFrame(cf); } @@ -319,20 +338,24 @@ public class CrossRef } /** - * find references to lrfs in the cross-reference set of each sequence in dataset (that is not equal to sequenceI) - * Identifies matching DBRefEntry based on source and accession string only - Map and Version are nulled. + * find references to lrfs in the cross-reference set of each sequence in + * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry + * based on source and accession string only - Map and Version are nulled. + * * @param sequenceI * @param lrfs * @param dataset * @param rseqs * @return true if matches were found. */ - private static boolean searchDatasetXrefs(SequenceI sequenceI, boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, AlignedCodonFrame cf) + private static boolean searchDatasetXrefs(SequenceI sequenceI, + boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, Vector rseqs, + AlignedCodonFrame cf) { - boolean found=false; - if (lrfs==null) + boolean found = false; + if (lrfs == null) return false; - for (int i=0;i