/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.io; import jalview.datamodel.*; import jalview.gui.*; import jalview.io.*; import java.io.*; import java.util.*; /** * DOCUMENT ME! * * @author $author$ * @version $Revision$ */ public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentPanel ap; ArrayList unknownSequences; CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); /** * Creates a new SequenceFeatureFetcher object. * * @param align DOCUMENT ME! * @param ap DOCUMENT ME! */ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { unknownSequences = new ArrayList(); this.align = align; this.ap = ap; Thread thread = new Thread(this); thread.start(); } /** * DOCUMENT ME! */ public void run() { RandomAccessFile out = null; try { String cache = System.getProperty("user.home") + "/.jalview.uniprot.xml"; File test = new File(cache); if (!test.exists()) { out = new RandomAccessFile(cache, "rw"); out.writeBytes("\n"); out.writeBytes("\n"); } else { out = new RandomAccessFile(cache, "rw"); // open exisiting cache and remove from the end long lastLine = 0; String data; while ((data = out.readLine()) != null) { if (data.indexOf("") > -1) { lastLine = out.getFilePointer(); } } out.seek(lastLine); } int seqIndex = 0; Vector sequences = align.getSequences(); while (seqIndex < sequences.size()) { ArrayList ids = new ArrayList(); for (int i = 0; (seqIndex < sequences.size()) && (i < 50); seqIndex++, i++) { SequenceI sequence = (SequenceI) sequences.get(seqIndex); ids.add(sequence.getName()); } tryLocalCacheFirst(ids, align); if (ids.size() > 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) remainingIds.append(ids.get(i) + ";"); EBIFetchClient ebi = new EBIFetchClient(); String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); if (result != null) { ReadUniprotFile(result, out, align); } } } if (out != null) { out.writeBytes("\n"); out.close(); } } catch (Exception ex) { ex.printStackTrace(); } findMissingIds(align); if (sbuffer.length() > 0) { output.setText( "Your sequences have been matched to Uniprot. Some of the ids have been\n" + "altered, most likely the start/end residue will have been updated.\n" + "Save your alignment to maintain the updated id.\n\n" + sbuffer.toString()); Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); } if (unknownSequences.size() > 0) { //ignore for now!!!!!!!!!! // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); } jalview.gui.PaintRefresher.Refresh(null, align); } /** * DOCUMENT ME! * * @param result DOCUMENT ME! * @param out DOCUMENT ME! * @param align DOCUMENT ME! */ void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) { SequenceI sequence = null; Vector features = null; String type; String description; String status; String start; String end; String pdb = null; for (int r = 0; r < result.length; r++) { if ((sequence == null) && (result[r].indexOf("") > -1)) { long filePointer = 0; if (out != null) { try { filePointer = out.getFilePointer(); out.writeBytes("\n"); } catch (Exception ex) { } } String seqName = parseElement(result[r], "", out); sequence = align.findName(seqName); if (sequence == null) { sequence = align.findName(seqName.substring(0, seqName.indexOf('_'))); if (sequence != null) { sbuffer.append("changing " + sequence.getName() + " to " + seqName + "\n"); sequence.setName(seqName); } } if (sequence == null) { sbuffer.append("UNIPROT updated suggestion is " + result[r] + "\n"); sequence = align.findName(result[r]); // this entry has been suggested by ebi. // doesn't match id in alignment file try { out.setLength(filePointer); } catch (Exception ex) { } // now skip to next entry while (result[r].indexOf("") == -1) r++; } features = new Vector(); type = ""; start = "0"; end = "0"; description = ""; status = ""; pdb = ""; } if (sequence == null) { continue; } if (result[r].indexOf(" -1) { pdb = parseValue(result[r], "value=", out); sequence.setPDBId(pdb); } if (result[r].indexOf("feature type") > -1) { type = parseValue(result[r], "type=", out); description = parseValue(result[r], "description=", null); status = parseValue(result[r], "status=", null); while (result[r].indexOf("position") == -1) { r++; // } // r++; if (result[r].indexOf("begin") > -1) { start = parseValue(result[r], "position=", out); end = parseValue(result[++r], "position=", out); } else { start = parseValue(result[r], "position=", out); end = parseValue(result[r], "position=", null); } int sstart = Integer.parseInt(start); int eend = Integer.parseInt(end); if (out != null) { try { out.writeBytes("\n"); } catch (Exception ex) { } } SequenceFeature sf = new SequenceFeature(type, sstart, eend, description, status); features.add(sf); } if (result[r].indexOf(" -1) { StringBuffer seqString = new StringBuffer(); if (out != null) { try { out.writeBytes(result[r] + "\n"); } catch (Exception ex) { } } while (result[++r].indexOf("") == -1) { seqString.append(result[r]); if (out != null) { try { out.writeBytes(result[r] + "\n"); } catch (Exception ex) { } } } if (out != null) { try { out.writeBytes(result[r] + "\n"); } catch (Exception ex) { } } StringBuffer nonGapped = new StringBuffer(); for (int i = 0; i < sequence.getSequence().length(); i++) { if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) { nonGapped.append(sequence.getCharAt(i)); } } int absStart = seqString.toString().indexOf(nonGapped.toString()); if (absStart == -1) { unknownSequences.add(sequence.getName()); features = null; sbuffer.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } int absEnd = absStart + nonGapped.toString().length(); absStart += 1; if ((absStart != sequence.getStart()) || (absEnd != sequence.getEnd())) { sbuffer.append("Updated: " + sequence.getName() + " " + sequence.getStart() + "/" + sequence.getEnd() + " to " + absStart + "/" + absEnd + "\n"); } sequence.setStart(absStart); sequence.setEnd(absEnd); } if (result[r].indexOf("") > -1) { if (features != null) { sequence.setSequenceFeatures(features); } features = null; sequence = null; if (out != null) { try { out.writeBytes("\n"); } catch (Exception ex) { } } } } } /** * DOCUMENT ME! * * @param align DOCUMENT ME! */ void findMissingIds(AlignmentI align) { String data; ArrayList cachedIds = new ArrayList(); try { BufferedReader in = new BufferedReader(new FileReader( jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); while ((data = in.readLine()) != null) { if (data.indexOf("name") > -1) { String name = parseElement(data, "", null); cachedIds.add(name); } } } catch (Exception ex) { ex.printStackTrace(); } for (int i = 0; i < align.getHeight(); i++) if (!cachedIds.contains(align.getSequenceAt(i).getName())) { unknownSequences.add(align.getSequenceAt(i).getName()); } } /** * DOCUMENT ME! * * @param ids DOCUMENT ME! * @param align DOCUMENT ME! */ void tryLocalCacheFirst(ArrayList ids, AlignmentI align) { ArrayList cacheData = new ArrayList(); try { BufferedReader in = new BufferedReader(new FileReader( jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); // read through cache file, if the cache has sequences we're looking for // add the lines to a new String array, Readthis new array and // make sure we remove the ids from the list to retrieve from EBI String data; while ((data = in.readLine()) != null) { if (data.indexOf("name") > -1) { String name = parseElement(data, "", null); if (ids.contains(name)) { cacheData.add(""); cacheData.add(data); while (data.indexOf("") == -1) { data = in.readLine(); cacheData.add(data); } cacheData.add(data); ids.remove(name); } } } } catch (Exception ex) { ex.printStackTrace(); } String[] localData = new String[cacheData.size()]; cacheData.toArray(localData); if ((localData != null) && (localData.length > 0)) { ReadUniprotFile(localData, null, align); } } /** * DOCUMENT ME! * * @param line DOCUMENT ME! * @param tag DOCUMENT ME! * @param out DOCUMENT ME! * * @return DOCUMENT ME! */ String parseValue(String line, String tag, RandomAccessFile out) { if (out != null) { try { out.writeBytes(line + "\n"); } catch (Exception ex) { } } int index = line.indexOf(tag) + tag.length() + 1; if (index == tag.length()) { return ""; } return line.substring(index, line.indexOf("\"", index + 1)); } /** * DOCUMENT ME! * * @param line DOCUMENT ME! * @param tag DOCUMENT ME! * @param out DOCUMENT ME! * * @return DOCUMENT ME! */ String parseElement(String line, String tag, RandomAccessFile out) { if (out != null) { try { out.writeBytes(line + "\n"); } catch (Exception ex) { } } int index = line.indexOf(tag) + tag.length(); return line.substring(index, line.indexOf("