/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.datamodel.*; import jalview.gui.*; import java.io.*; import java.util.*; import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.*; import jalview.analysis.AlignSeq; /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentI dataset; AlignmentPanel ap; ArrayList unknownSequences; CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); public SequenceFeatureFetcher() {} public Vector getUniprotEntries(File file) { UniprotFile uni = new UniprotFile(); try { // 1. Load the mapping information from the file Mapping map = new Mapping(uni.getClass().getClassLoader()); java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); map.loadMapping(url); // 2. Unmarshal the data Unmarshaller unmar = new Unmarshaller(); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); } catch (Exception e) { System.out.println("Error getUniprotEntries() "+e); } return uni.getUniprotEntries(); } /** * Creates a new SequenceFeatureFetcher object. * * @param align DOCUMENT ME! * @param ap DOCUMENT ME! */ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { unknownSequences = new ArrayList(); this.align = align; this.dataset = align.getDataset(); this.ap = ap; Thread thread = new Thread(this); thread.start(); } /** * DOCUMENT ME! */ public void run() { try { int seqIndex = 0; Vector sequences = dataset.getSequences(); while (seqIndex < sequences.size()) { Vector ids = new Vector(); for (int i = 0; (seqIndex < sequences.size()) && (i < 50); seqIndex++, i++) { SequenceI sequence = (SequenceI) sequences.get(seqIndex); if(sequence.getSequenceFeatures()==null) { if (!ids.contains(sequence.getName())) { ids.add(sequence.getName()); unknownSequences.add(sequence); } } } /////////////////////////////////// ///READ FROM EBI if (ids.size() > 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) { remainingIds.append(ids.get(i) + ";"); } EBIFetchClient ebi = new EBIFetchClient(); File file = ebi.fetchDataAsFile(remainingIds.toString(), "xml", "raw"); if (file != null) { ReadUniprotFile(file, ids); } } } } catch (Exception ex) { ex.printStackTrace(); } if (sbuffer.length() > 0) { output.setText( "Your sequences have been matched to Uniprot. Some of the ids have been\n" + "altered, most likely the start/end residue will have been updated.\n" + "Save your alignment to maintain the updated id.\n\n" + sbuffer.toString()); Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); // The above is the dataset, we must now find out the index // of the viewed sequence } promptBeforeBlast(); } void promptBeforeBlast() { // This must be outside the run() body as java 1.5 // will not return any value from the OptionPane to the expired thread. if (unknownSequences.size() > 0) { int reply = javax.swing.JOptionPane.showConfirmDialog( Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences." +"\nPerform blast for unknown sequences?", "Blast for Unidentified Sequences", javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE); if(reply == javax.swing.JOptionPane.YES_OPTION) new WSWUBlastClient(ap, align, unknownSequences); } ap.repaint(); } /** * DOCUMENT ME! * * @param result DOCUMENT ME! * @param out DOCUMENT ME! * @param align DOCUMENT ME! */ void ReadUniprotFile(File file, Vector ids) { if(!file.exists()) return; SequenceI sequence = null; Vector entries = getUniprotEntries(file); int i, iSize = entries==null?0:entries.size(); UniprotEntry entry; for (i = 0; i < iSize; i++) { entry = (UniprotEntry) entries.elementAt(i); String idmatch = entry.getAccession().elementAt(0).toString(); sequence = dataset.findName(idmatch); if (sequence == null) { //Sequence maybe Name, not Accession idmatch = entry.getName().elementAt(0).toString(); sequence = dataset.findName(idmatch); } if (sequence == null) { System.out.println(idmatch+" not found"); continue; } ids.remove(sequence.getName()); unknownSequences.remove(sequence); String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); int absStart = entry.getUniprotSequence().getContent().indexOf( nonGapped.toString()); if (absStart == -1) { // Is UniprotSequence contained in dataset sequence? absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent()); if(absStart == -1) { unknownSequences.add(sequence.getName()); sbuffer.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } else { if(entry.getFeature()!=null) { Enumeration e = entry.getFeature().elements(); while (e.hasMoreElements()) { SequenceFeature sf = (SequenceFeature) e.nextElement(); sf.setBegin(sf.getBegin() + absStart + 1); sf.setEnd(sf.getEnd() + absStart + 1); } } sbuffer.append(sequence.getName() + " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES" +" HAVE BEEN ADJUSTED ACCORDINGLY \n"); absStart = 0; } } int absEnd = absStart + nonGapped.toString().length(); absStart += 1; Enumeration e = entry.getDbReference().elements(); Vector onlyPdbEntries = new Vector(); while(e.hasMoreElements()) { PDBEntry pdb = (PDBEntry)e.nextElement(); if(!pdb.getType().equals("PDB")) continue; onlyPdbEntries.addElement(pdb); } sequence.setPDBId(onlyPdbEntries); sequence.setSequenceFeatures(entry.getFeature()); sequence.setStart(absStart); sequence.setEnd(absEnd); int n = 0; SequenceI seq2; while (n < align.getHeight()) { //This loop enables multiple sequences with the same //id to have features added and seq limits updated seq2 = align.getSequenceAt(n); if (seq2.getName().equals(idmatch)) { nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence()); absStart = sequence.getSequence().indexOf(nonGapped); absEnd = absStart + nonGapped.toString().length() - 1; // This is the Viewd alignment sequences // No need to tell the user of the dataset updates if ( (seq2.getStart() != absStart+sequence.getStart()) || (seq2.getEnd() != absEnd+sequence.getStart())) { sbuffer.append("Updated: " + seq2.getName() + " " + seq2.getStart() + "/" + seq2.getEnd() + " to " + (absStart + sequence.getStart()) + "/" + (absEnd + sequence.getStart()) + "\n"); seq2.setStart(absStart + sequence.getStart()); seq2.setEnd(absEnd + sequence.getStart()); } } n++; } } } }