From 60fc6f2cf9816f04e060b233400edb9ee59c4f92 Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Thu, 22 Jun 2006 08:49:34 +0000 Subject: [PATCH] Renamed to DBRefFetcher --- src/jalview/io/SequenceFeatureFetcher.java | 363 ---------------------------- 1 file changed, 363 deletions(-) delete mode 100755 src/jalview/io/SequenceFeatureFetcher.java diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java deleted file mode 100755 index edbf862..0000000 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ /dev/null @@ -1,363 +0,0 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.io; - -import jalview.datamodel.*; - -import jalview.gui.*; - -import java.io.*; - -import java.util.*; - -import org.exolab.castor.mapping.Mapping; - -import org.exolab.castor.xml.*; -import jalview.analysis.AlignSeq; - - - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -public class SequenceFeatureFetcher implements Runnable -{ - - AlignmentI align; - AlignmentI dataset; - AlignmentPanel ap; - ArrayList unknownSequences; - CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - boolean uniprotFlag = false; - - public SequenceFeatureFetcher() - {} - - public Vector getUniprotEntries(File file) - { - - UniprotFile uni = new UniprotFile(); - try - { - // 1. Load the mapping information from the file - Mapping map = new Mapping(uni.getClass().getClassLoader()); - java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); - map.loadMapping(url); - - // 2. Unmarshal the data - Unmarshaller unmar = new Unmarshaller(uni); - unmar.setIgnoreExtraElements(true); - unmar.setMapping(map); - // unmar.setDebug(true); - - uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); - } - catch (Exception e) - { - System.out.println("Error getUniprotEntries() "+e); - } - - - return uni.getUniprotEntries(); - } - - /** - * Creates a new SequenceFeatureFetcher object. - * - * @param align DOCUMENT ME! - * @param ap DOCUMENT ME! - */ - public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) - { - unknownSequences = new ArrayList(); - this.align = align; - this.dataset = align.getDataset(); - this.ap = ap; - - Thread thread = new Thread(this); - thread.start(); - } - - /** - * DOCUMENT ME! - */ - public void run() - { - try - { - int seqIndex = 0; - Vector sequences = dataset.getSequences(); - - while (seqIndex < sequences.size()) - { - Vector ids = new Vector(); - - for (int i = 0; (seqIndex < sequences.size()) && (i < 50); - seqIndex++, i++) - { - Sequence sequence = (Sequence) sequences.get(seqIndex); - Vector uprefs = jalview.util.DBRefUtils.selectRefs(sequence.getDBRef(), new String[] { - jalview.datamodel.DBRefSource.UNIPROT}); - if (uprefs!=null) - { - // we know the id for this entry, so don't note its ID in the unknownSequences list - for (int j=0,k=uprefs.size(); j 0) - { - StringBuffer remainingIds = new StringBuffer("uniprot:"); - for (int i = 0; i < ids.size(); i++) - { - if(ids.get(i).toString().indexOf("|")>-1) - { - remainingIds.append(ids.get(i).toString().substring( - ids.get(i).toString().lastIndexOf("|") + 1)); - uniprotFlag = true; - } - remainingIds.append(ids.get(i) + ";"); - } - EBIFetchClient ebi = new EBIFetchClient(); - File file = ebi.fetchDataAsFile(remainingIds.toString(), - "xml", "raw"); - - - - if (file != null) - { - ReadUniprotFile(file, ids); - } - } - } - } - catch (Exception ex) - { - ex.printStackTrace(); - } - - if (sbuffer.length() > 0) - { - output.setText( - "Your sequences have been matched to Uniprot. Some of the ids have been\n" + - "altered, most likely the start/end residue will have been updated.\n" + - "Save your alignment to maintain the updated id.\n\n" + - sbuffer.toString()); - Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); - // The above is the dataset, we must now find out the index - // of the viewed sequence - - } - - promptBeforeBlast(); - - } - - - void promptBeforeBlast() - { - // This must be outside the run() body as java 1.5 - // will not return any value from the OptionPane to the expired thread. - if (unknownSequences.size() > 0) - { - // int reply = javax.swing.JOptionPane.showConfirmDialog( - // Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences." - // +"\nPerform blast for unknown sequences?", - // "Blast for Unidentified Sequences", - // javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE); - javax.swing.JOptionPane.showMessageDialog( - Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences.", - "Unidentified Sequences", - javax.swing.JOptionPane.WARNING_MESSAGE); - - - // if(reply == javax.swing.JOptionPane.YES_OPTION) - // new WSWUBlastClient(ap, align, unknownSequences); - } - - - ap.repaint(); - } - - /** - * DOCUMENT ME! - * - * @param result DOCUMENT ME! - * @param out DOCUMENT ME! - * @param align DOCUMENT ME! - */ - void ReadUniprotFile(File file, Vector ids) - { - if(!file.exists()) - return; - - SequenceI sequence = null; - - Vector entries = getUniprotEntries(file); - - int i, iSize = entries==null?0:entries.size(); - UniprotEntry entry; - for (i = 0; i < iSize; i++) - { - entry = (UniprotEntry) entries.elementAt(i); - String idmatch = entry.getAccession().elementAt(0).toString(); - sequence = dataset.findName(idmatch); - - if (sequence == null) - { - //Sequence maybe Name, not Accession - idmatch = entry.getName().elementAt(0).toString(); - sequence = dataset.findName(idmatch); - } - - if(sequence!=null) - ids.remove(sequence.getName()); - - else if (sequence == null && uniprotFlag) - { - sequence = dataset.findName("UniProt/Swiss-Prot|"+entry.getAccession().elementAt(0)+"|"+idmatch); - ids.remove(idmatch); - } - - if(sequence ==null) - { - System.out.println(idmatch+" not found"); - continue; - } - - - String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); - - int absStart = entry.getUniprotSequence().getContent().indexOf( - nonGapped.toString()); - - if (absStart == -1) - { - // Is UniprotSequence contained in dataset sequence? - absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent()); - if(absStart == -1) - { - sbuffer.append(sequence.getName() + - " SEQUENCE NOT %100 MATCH \n"); - - continue; - } - else - { - - if(entry.getFeature()!=null) - { - Enumeration e = entry.getFeature().elements(); - while (e.hasMoreElements()) - { - SequenceFeature sf = (SequenceFeature) e.nextElement(); - sf.setBegin(sf.getBegin() + absStart + 1); - sf.setEnd(sf.getEnd() + absStart + 1); - } - } - - sbuffer.append(sequence.getName() + - " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES" - +" HAVE BEEN ADJUSTED ACCORDINGLY \n"); - absStart = 0; - } - - } - - unknownSequences.remove(sequence); - - int absEnd = absStart + nonGapped.toString().length(); - absStart += 1; - - Enumeration e = entry.getDbReference().elements(); - Vector onlyPdbEntries = new Vector(); - while(e.hasMoreElements()) - { - PDBEntry pdb = (PDBEntry)e.nextElement(); - if(!pdb.getType().equals("PDB")) - continue; - - onlyPdbEntries.addElement(pdb); - } - - sequence.setPDBId(onlyPdbEntries); - if (entry.getFeature()!=null) { - e = entry.getFeature().elements(); - while (e.hasMoreElements()) - { - SequenceFeature sf = (SequenceFeature) e.nextElement(); - sf.setFeatureGroup("Uniprot"); - sequence.addSequenceFeature( sf ); - } - } - sequence.setStart(absStart); - sequence.setEnd(absEnd); - - - int n = 0; - SequenceI seq2; - while (n < align.getHeight()) - { - //This loop enables multiple sequences with the same - //id to have features added and seq limits updated - seq2 = align.getSequenceAt(n); - if (seq2.getName().equals(idmatch)) - { - - nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence()); - - absStart = sequence.getSequence().indexOf(nonGapped); - absEnd = absStart + nonGapped.toString().length() - 1; - - // This is the Viewd alignment sequences - // No need to tell the user of the dataset updates - if ( (seq2.getStart() != absStart+sequence.getStart()) - || (seq2.getEnd() != absEnd+sequence.getStart())) - { - sbuffer.append("Updated: " + seq2.getName() + " " + - seq2.getStart() + "/" + seq2.getEnd() + - " to " + (absStart + sequence.getStart()) + "/" + - (absEnd + sequence.getStart()) + "\n"); - - seq2.setStart(absStart + sequence.getStart()); - seq2.setEnd(absEnd + sequence.getStart()); - } - } - - n++; - } - } - } -} - - -- 1.7.10.2