/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.datamodel.*; import jalview.gui.*; import java.io.*; import java.util.*; import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.*; import jalview.analysis.AlignSeq; /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ public class DBRefFetcher implements Runnable { AlignmentI align; AlignmentI dataset; AlignFrame af; ArrayList unknownSequences; CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); boolean uniprotFlag = false; boolean running = false; public DBRefFetcher() {} public Vector getUniprotEntries(File file) { UniprotFile uni = new UniprotFile(); try { // 1. Load the mapping information from the file Mapping map = new Mapping(uni.getClass().getClassLoader()); java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); map.loadMapping(url); // 2. Unmarshal the data Unmarshaller unmar = new Unmarshaller(uni); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); // unmar.setDebug(true); uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); } catch (Exception e) { System.out.println("Error getUniprotEntries() "+e); } return uni.getUniprotEntries(); } /** * Creates a new SequenceFeatureFetcher object. * * @param align DOCUMENT ME! * @param ap DOCUMENT ME! */ public DBRefFetcher(AlignmentI align, AlignFrame af) { this.af = af; unknownSequences = new ArrayList(); this.align = align; this.dataset = align.getDataset(); } public boolean fetchDBRefs(boolean waitTillFinished) { Thread thread = new Thread(this); thread.start(); running = true; if(waitTillFinished) { while(running) { try{ Thread.sleep(500); }catch(Exception ex){} } } return true; } /** * DOCUMENT ME! */ public void run() { long startTime = System.currentTimeMillis(); af.setProgressBar("Fetching db refs", startTime); running = true; try { int seqIndex = 0; Vector sequences = dataset.getSequences(); while (seqIndex < sequences.size()) { Vector ids = new Vector(); for (int i = 0; (seqIndex < sequences.size()) && (i < 50); seqIndex++, i++) { Sequence sequence = (Sequence) sequences.get(seqIndex); DBRefEntry [] uprefs = jalview.util.DBRefUtils.selectRefs(sequence.getDBRef(), new String[] { jalview.datamodel.DBRefSource.UNIPROT}); if (uprefs!=null) { // we know the id for this entry, so don't note its ID in the unknownSequences list for (int j=0,k=uprefs.length; j 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) { if(ids.get(i).toString().indexOf("|")>-1) { remainingIds.append(ids.get(i).toString().substring( ids.get(i).toString().lastIndexOf("|") + 1)); uniprotFlag = true; } else remainingIds.append(ids.get(i)); remainingIds.append(";"); } EBIFetchClient ebi = new EBIFetchClient(); File file = ebi.fetchDataAsFile(remainingIds.toString(), "xml", "raw"); if (file != null) { ReadUniprotFile(file, ids); } } } } catch (Exception ex) { ex.printStackTrace(); } if (sbuffer.length() > 0) { output.setText( "Your sequences have been matched to Uniprot. Some of the ids have been\n" + "altered, most likely the start/end residue will have been updated.\n" + "Save your alignment to maintain the updated id.\n\n" + sbuffer.toString()); Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); // The above is the dataset, we must now find out the index // of the viewed sequence } af.setProgressBar("DBRef search completed", startTime); // promptBeforeBlast(); running = false; } void promptBeforeBlast() { // This must be outside the run() body as java 1.5 // will not return any value from the OptionPane to the expired thread. if (unknownSequences.size() > 0) { // int reply = javax.swing.JOptionPane.showConfirmDialog( // Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences." // +"\nPerform blast for unknown sequences?", // "Blast for Unidentified Sequences", // javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE); javax.swing.JOptionPane.showMessageDialog( Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences.", "Unidentified Sequences", javax.swing.JOptionPane.WARNING_MESSAGE); // if(reply == javax.swing.JOptionPane.YES_OPTION) // new WSWUBlastClient(ap, align, unknownSequences); } } /** * DOCUMENT ME! * * @param result DOCUMENT ME! * @param out DOCUMENT ME! * @param align DOCUMENT ME! */ void ReadUniprotFile(File file, Vector ids) { if(!file.exists()) return; SequenceI [] sequence = null; Vector entries = getUniprotEntries(file); int i, iSize = entries==null?0:entries.size(); UniprotEntry entry; for (i = 0; i < iSize; i++) { entry = (UniprotEntry) entries.elementAt(i); String idmatch = entry.getAccession().elementAt(0).toString(); sequence = dataset.findSequenceMatch(idmatch); if (sequence.length==0) { //Sequence maybe Name, not Accession idmatch = entry.getName().elementAt(0).toString(); sequence = dataset.findSequenceMatch(idmatch); } if(sequence.length>0) ids.remove(sequence[0].getName()); else if (sequence.length==0 && uniprotFlag) { StringBuffer upid = new StringBuffer("UniProt/Swiss-Prot|"); for(int u=0; u