X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FDBRefFetcher.java;h=bd3c73323de37914f913ad59e54ff34fb550b361;hb=dd74fc4938723fe5ec48d4e5fdcfbe58ac42a48d;hp=c11910b2c69e010a2e7492875af681fbe57a38d1;hpb=8e728f4ac9c2dc718f97db2e34fc3571a9622f94;p=jalview.git diff --git a/src/jalview/io/DBRefFetcher.java b/src/jalview/io/DBRefFetcher.java index c11910b..bd3c733 100644 --- a/src/jalview/io/DBRefFetcher.java +++ b/src/jalview/io/DBRefFetcher.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -41,13 +41,15 @@ import jalview.analysis.AlignSeq; */ public class DBRefFetcher implements Runnable { - AlignmentI align; AlignmentI dataset; AlignFrame af; - ArrayList unknownSequences; CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); - boolean uniprotFlag = false; + boolean running = false; + + ///This will be a collection of Vectors of sequenceI refs. + //The key will be the seq name or accession id of the seq + Hashtable seqRefs; public DBRefFetcher() {} @@ -66,7 +68,6 @@ public class DBRefFetcher implements Runnable Unmarshaller unmar = new Unmarshaller(uni); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); - // unmar.setDebug(true); uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); } @@ -88,14 +89,64 @@ public class DBRefFetcher implements Runnable public DBRefFetcher(AlignmentI align, AlignFrame af) { this.af = af; - unknownSequences = new ArrayList(); - this.align = align; this.dataset = align.getDataset(); + } + public boolean fetchDBRefs(boolean waitTillFinished) + { Thread thread = new Thread(this); thread.start(); + running = true; + + if(waitTillFinished) + { + while(running) + { + try{ + Thread.sleep(500); + }catch(Exception ex){} + } + } + + return true; + } + + /** + * The sequence will be added to a vector of sequences + * belonging to key which could be either seq name or dbref id + * @param seq SequenceI + * @param key String + */ + void addSeqId(SequenceI seq, String key) + { + key = key.toUpperCase(); + + Vector seqs; + if(seqRefs.containsKey(key)) + { + seqs = (Vector)seqRefs.get(key); + + if(seqs!=null && !seqs.contains(seq)) + { + seqs.addElement(seq); + } + else if(seqs==null) + { + seqs = new Vector(); + seqs.addElement(seq); + } + + } + else + { + seqs = new Vector(); + seqs.addElement(seq); + } + + seqRefs.put(key, seqs); } + /** * DOCUMENT ME! */ @@ -103,6 +154,9 @@ public class DBRefFetcher implements Runnable { long startTime = System.currentTimeMillis(); af.setProgressBar("Fetching db refs", startTime); + running = true; + + seqRefs = new Hashtable(); try { @@ -111,58 +165,52 @@ public class DBRefFetcher implements Runnable while (seqIndex < sequences.size()) { - Vector ids = new Vector(); + StringBuffer queryString = new StringBuffer("uniprot:"); for (int i = 0; (seqIndex < sequences.size()) && (i < 50); seqIndex++, i++) { Sequence sequence = (Sequence) sequences.get(seqIndex); - Vector uprefs = jalview.util.DBRefUtils.selectRefs(sequence.getDBRef(), new String[] { + DBRefEntry [] uprefs = jalview.util.DBRefUtils.selectRefs(sequence.getDBRef(), new String[] { jalview.datamodel.DBRefSource.UNIPROT}); if (uprefs!=null) { - // we know the id for this entry, so don't note its ID in the unknownSequences list - for (int j=0,k=uprefs.size(); j50) + break; + + for(int j=0; j50) + { + //Dont send more than 50 id strings to dbFetch!! + seqIndex --; + } + else + { + while(st.hasMoreTokens()) + { + String token = st.nextToken(); + addSeqId(sequence, token); + queryString.append(token+";"); + } } } } /////////////////////////////////// ///READ FROM EBI - if (ids.size() > 0) + EBIFetchClient ebi = new EBIFetchClient(); + File file = ebi.fetchDataAsFile(queryString.toString(), "xml", "raw"); + if (file != null) { - StringBuffer remainingIds = new StringBuffer("uniprot:"); - for (int i = 0; i < ids.size(); i++) - { - if(ids.get(i).toString().indexOf("|")>-1) - { - remainingIds.append(ids.get(i).toString().substring( - ids.get(i).toString().lastIndexOf("|") + 1)); - uniprotFlag = true; - } - else - remainingIds.append(ids.get(i)); - - remainingIds.append(";"); - } - - EBIFetchClient ebi = new EBIFetchClient(); - File file = ebi.fetchDataAsFile(remainingIds.toString(), - "xml", "raw"); - - - - if (file != null) - { - ReadUniprotFile(file, ids); - } + ReadUniprotFile(file); } } } @@ -187,31 +235,11 @@ public class DBRefFetcher implements Runnable af.setProgressBar("DBRef search completed", startTime); // promptBeforeBlast(); - } - + running = false; - void promptBeforeBlast() - { - // This must be outside the run() body as java 1.5 - // will not return any value from the OptionPane to the expired thread. - if (unknownSequences.size() > 0) - { - // int reply = javax.swing.JOptionPane.showConfirmDialog( - // Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences." - // +"\nPerform blast for unknown sequences?", - // "Blast for Unidentified Sequences", - // javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE); - javax.swing.JOptionPane.showMessageDialog( - Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences.", - "Unidentified Sequences", - javax.swing.JOptionPane.WARNING_MESSAGE); - - - // if(reply == javax.swing.JOptionPane.YES_OPTION) - // new WSWUBlastClient(ap, align, unknownSequences); - } } + /** * DOCUMENT ME! * @@ -219,77 +247,83 @@ public class DBRefFetcher implements Runnable * @param out DOCUMENT ME! * @param align DOCUMENT ME! */ - void ReadUniprotFile(File file, Vector ids) + void ReadUniprotFile(File file) { - if(!file.exists()) + if (!file.exists()) return; SequenceI sequence = null; Vector entries = getUniprotEntries(file); - int i, iSize = entries==null?0:entries.size(); + int i, iSize = entries == null ? 0 : entries.size(); UniprotEntry entry; for (i = 0; i < iSize; i++) { entry = (UniprotEntry) entries.elementAt(i); - String idmatch = entry.getAccession().elementAt(0).toString(); - sequence = dataset.findName(idmatch); - if (sequence == null) + //Work out which sequences this Uniprot file has matches to, + //taking into account all accessionIds and names in the file + Vector sequenceMatches = new Vector(); + for (int j = 0; j < entry.getAccession().size(); j++) { - //Sequence maybe Name, not Accession - idmatch = entry.getName().elementAt(0).toString(); - sequence = dataset.findName(idmatch); + String accessionId = entry.getAccession().elementAt(j).toString(); + if (seqRefs.containsKey(accessionId)) + { + Vector seqs = (Vector) seqRefs.get(accessionId); + for (int jj = 0; jj < seqs.size(); jj++) + { + sequence = (SequenceI) seqs.elementAt(jj); + if (!sequenceMatches.contains(sequence)) + sequenceMatches.addElement(sequence); + } + } } - - if(sequence!=null) - ids.remove(sequence.getName()); - - else if (sequence == null && uniprotFlag) + for (int j = 0; j < entry.getName().size(); j++) { - StringBuffer upid = new StringBuffer("UniProt/Swiss-Prot|"); - for(int u=0; u