/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.datamodel.*; import jalview.gui.*; import java.io.*; import java.util.*; import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.*; import jalview.analysis.AlignSeq; /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentPanel ap; ArrayList unknownSequences; CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); Vector localCache = new Vector(); Vector getUniprotEntries(File file) { UniprotFile uni = new UniprotFile(); try { // 1. Load the mapping information from the file Mapping map = new Mapping(uni.getClass().getClassLoader()); java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml"); map.loadMapping(url); // 2. Unmarshal the data Unmarshaller unmar = new Unmarshaller(); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); localCache.addAll( uni.getUniprotEntries() ); // 3. marshal the data with the total price back and print the XML in the console // Marshaller marshaller = new Marshaller( // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE")) // ); // marshaller.setMapping(map); // marshaller.marshal(uni); } catch (Exception e) { System.out.println("Error getUniprotEntries() "+e); // e.printStackTrace(); // if(!updateLocalCache) // file.delete(); } return uni.getUniprotEntries(); } /** * Creates a new SequenceFeatureFetcher object. * * @param align DOCUMENT ME! * @param ap DOCUMENT ME! */ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { unknownSequences = new ArrayList(); this.align = align; this.ap = ap; Thread thread = new Thread(this); thread.start(); } /** * DOCUMENT ME! */ public void run() { try { int seqIndex = 0; Vector sequences = align.getSequences(); while (seqIndex < sequences.size()) { Vector ids = new Vector(); for (int i = 0; (seqIndex < sequences.size()) && (i < 50); seqIndex++, i++) { SequenceI sequence = (SequenceI) sequences.get(seqIndex); ids.add(sequence.getName()); unknownSequences.add(sequence.getName()); } /////////////////////////////////// ///READ FROM EBI if (ids.size() > 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) { remainingIds.append(ids.get(i) + ";"); } EBIFetchClient ebi = new EBIFetchClient(); File file = ebi.fetchDataAsFile(remainingIds.toString(), "xml", null); if (file != null) { ReadUniprotFile(file, align, ids); } } } } catch (Exception ex) { ex.printStackTrace(); } if (sbuffer.length() > 0) { output.setText( "Your sequences have been matched to Uniprot. Some of the ids have been\n" + "altered, most likely the start/end residue will have been updated.\n" + "Save your alignment to maintain the updated id.\n\n" + sbuffer.toString()); Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); } if (unknownSequences.size() > 0) { new WSWUBlastClient(ap, align, unknownSequences); } else ((Alignment)align).featuresAdded = true; ap.repaint(); } /** * DOCUMENT ME! * * @param result DOCUMENT ME! * @param out DOCUMENT ME! * @param align DOCUMENT ME! */ void ReadUniprotFile(File file, AlignmentI align, Vector ids) { if(!file.exists()) return; SequenceI sequence = null; // String pdb = null; Vector entries = getUniprotEntries(file); int i, iSize = entries==null?0:entries.size(); UniprotEntry entry; for (i = 0; i < iSize; i++) { entry = (UniprotEntry) entries.elementAt(i); String idmatch = entry.getAccession(); sequence = align.findName(idmatch); if (sequence == null) { //Sequence maybe Name, not Accession idmatch = entry.getName(); sequence = align.findName(idmatch); } if (sequence == null) { continue; } ids.remove(sequence.getName()); unknownSequences.remove(sequence.getName()); String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); int absStart = entry.getUniprotSequence().getContent().indexOf( nonGapped.toString()); if (absStart == -1) { unknownSequences.add(sequence.getName()); sbuffer.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } int absEnd = absStart + nonGapped.toString().length(); absStart += 1; if ( (absStart != sequence.getStart()) || (absEnd != sequence.getEnd())) { sbuffer.append("Updated: " + sequence.getName() + " " + sequence.getStart() + "/" + sequence.getEnd() + " to " + absStart + "/" + absEnd + "\n"); } sequence.setSequenceFeatures(entry.getFeatures()); sequence.setStart(absStart); sequence.setEnd(absEnd); } } }