X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=520ba03826f0ddc6f5c11c07591372b94097a8e8;hb=b9926a366ae75eb28250166fba939d2f9bcf185f;hp=c8c39dc893c0c0009ef75bad39806d647e206170;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java index c8c39dc..520ba03 100755 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -22,398 +22,226 @@ import jalview.datamodel.*; import jalview.gui.*; -import jalview.io.*; - import java.io.*; import java.util.*; -import javax.swing.*; - - -public class SequenceFeatureFetcher implements Runnable { - AlignmentI align; - AlignmentPanel ap; - ArrayList unknownSequences; - CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - - public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { - unknownSequences = new ArrayList(); - this.align = align; - this.ap = ap; - - Thread thread = new Thread(this); - thread.start(); - } - - public void run() { - String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - - RandomAccessFile out = null; - - try { - if (cache == null) { - jalview.bin.Cache.setProperty("UNIPROT_CACHE", - System.getProperty("user.home") + "/uniprot.xml"); - cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - } - - File test = new File(cache); - - if (!test.exists()) { - out = new RandomAccessFile(cache, "rw"); - out.writeBytes("\n"); - out.writeBytes("\n"); - } else { - out = new RandomAccessFile(cache, "rw"); +import org.exolab.castor.mapping.Mapping; - // open exisiting cache and remove from the end - long lastLine = 0; - String data; +import org.exolab.castor.xml.*; +import jalview.analysis.AlignSeq; - while ((data = out.readLine()) != null) { - if (data.indexOf("") > -1) { - lastLine = out.getFilePointer(); - } - } - out.seek(lastLine); - } - int seqIndex = 0; - Vector sequences = align.getSequences(); +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class SequenceFeatureFetcher implements Runnable +{ - while (seqIndex < sequences.size()) { - ArrayList ids = new ArrayList(); + AlignmentI align; + AlignmentPanel ap; + ArrayList unknownSequences; + CutAndPasteTransfer output = new CutAndPasteTransfer(); + StringBuffer sbuffer = new StringBuffer(); - for (int i = 0; (seqIndex < sequences.size()) && (i < 50); - seqIndex++, i++) { - SequenceI sequence = (SequenceI) sequences.get(seqIndex); - ids.add(sequence.getName()); - } + Vector localCache = new Vector(); - tryLocalCacheFirst(ids, align); + Vector getUniprotEntries(File file) + { - if (ids.size() > 0) { - StringBuffer remainingIds = new StringBuffer("uniprot:"); + UniprotFile uni = new UniprotFile(); + try + { + // 1. Load the mapping information from the file + Mapping map = new Mapping(uni.getClass().getClassLoader()); + java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml"); + map.loadMapping(url); - for (int i = 0; i < ids.size(); i++) - remainingIds.append(ids.get(i) + ";"); + // 2. Unmarshal the data + Unmarshaller unmar = new Unmarshaller(); + unmar.setIgnoreExtraElements(true); + unmar.setMapping(map); + uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); + localCache.addAll( uni.getUniprotEntries() ); - EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), - "xml", null); + // 3. marshal the data with the total price back and print the XML in the console + // Marshaller marshaller = new Marshaller( + // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE")) + // ); + // marshaller.setMapping(map); + // marshaller.marshal(uni); - if (result != null) { - ReadUniprotFile(result, out, align); - } - } - } - - if (out != null) { - out.writeBytes("\n"); - out.close(); - } - } catch (Exception ex) { - ex.printStackTrace(); - } - - ap.repaint(); - findMissingIds(align); + } + catch (Exception e) + { + System.out.println("Error getUniprotEntries() "+e); + // e.printStackTrace(); + // if(!updateLocalCache) + // file.delete(); - if (sbuffer.length() > 0) { - output.setText( - "Your sequences have been matched to Uniprot. Some of the ids have been\n" + - "altered, most likely the start/end residue will have been updated.\n" + - "Save your alignment to maintain the updated id.\n\n" + - sbuffer.toString()); - Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); + } + return uni.getUniprotEntries(); + } + + /** + * Creates a new SequenceFeatureFetcher object. + * + * @param align DOCUMENT ME! + * @param ap DOCUMENT ME! + */ + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) + { + unknownSequences = new ArrayList(); + this.align = align; + this.ap = ap; + + Thread thread = new Thread(this); + thread.start(); + } + + /** + * DOCUMENT ME! + */ + public void run() + { + try + { + int seqIndex = 0; + Vector sequences = align.getSequences(); + + while (seqIndex < sequences.size()) + { + Vector ids = new Vector(); + + for (int i = 0; (seqIndex < sequences.size()) && (i < 50); + seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + unknownSequences.add(sequence.getName()); } - if (unknownSequences.size() > 0) { - //ignore for now!!!!!!!!!! - // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); + /////////////////////////////////// + ///READ FROM EBI + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + for (int i = 0; i < ids.size(); i++) + { + remainingIds.append(ids.get(i) + ";"); + } + EBIFetchClient ebi = new EBIFetchClient(); + File file = ebi.fetchDataAsFile(remainingIds.toString(), + "xml", null); + + + if (file != null) + { + ReadUniprotFile(file, align, ids); + } } + } } - - void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) { - SequenceI sequence = null; - Vector features = null; - String type; - String description; - String status; - String start; - String end; - String pdb = null; - - for (int r = 0; r < result.length; r++) { - if ((sequence == null) && (result[r].indexOf("") > -1)) { - long filePointer = 0; - - if (out != null) { - try { - filePointer = out.getFilePointer(); - out.writeBytes("\n"); - } catch (Exception ex) { - } - } - - String seqName = parseElement(result[r], "", out); - sequence = align.findName(seqName); - - if (sequence == null) { - sequence = align.findName(seqName.substring(0, - seqName.indexOf('_'))); - - if (sequence != null) { - sbuffer.append("changing " + sequence.getName() + - " to " + seqName + "\n"); - sequence.setName(seqName); - } - } - - if (sequence == null) { - sbuffer.append("UNIPROT updated suggestion is " + - result[r] + "\n"); - sequence = align.findName(result[r]); - - // this entry has been suggested by ebi. - // doesn't match id in alignment file - try { - out.setLength(filePointer); - } catch (Exception ex) { - } - - // now skip to next entry - while (result[r].indexOf("") == -1) - r++; - } - - features = new Vector(); - type = ""; - start = "0"; - end = "0"; - description = ""; - status = ""; - pdb = ""; - } - - if (sequence == null) { - continue; - } - - if (result[r].indexOf(" -1) { - pdb = parseValue(result[r], "value=", out); - sequence.setPDBId(pdb); - } - - if (result[r].indexOf("feature type") > -1) { - type = parseValue(result[r], "type=", out); - description = parseValue(result[r], "description=", null); - status = parseValue(result[r], "status=", null); - - while (result[r].indexOf("position") == -1) { - r++; // - } - - // r++; - if (result[r].indexOf("begin") > -1) { - start = parseValue(result[r], "position=", out); - end = parseValue(result[++r], "position=", out); - } else { - start = parseValue(result[r], "position=", out); - end = parseValue(result[r], "position=", null); - } - - int sstart = Integer.parseInt(start); - int eend = Integer.parseInt(end); - - if (out != null) { - try { - out.writeBytes("\n"); - } catch (Exception ex) { - } - } - - SequenceFeature sf = new SequenceFeature(type, sstart, eend, - description, status); - features.add(sf); - } - - if (result[r].indexOf(" -1) { - StringBuffer seqString = new StringBuffer(); - - if (out != null) { - try { - out.writeBytes(result[r] + "\n"); - } catch (Exception ex) { - } - } - - while (result[++r].indexOf("") == -1) { - seqString.append(result[r]); - - if (out != null) { - try { - out.writeBytes(result[r] + "\n"); - } catch (Exception ex) { - } - } - } - - if (out != null) { - try { - out.writeBytes(result[r] + "\n"); - } catch (Exception ex) { - } - } - - StringBuffer nonGapped = new StringBuffer(); - - for (int i = 0; i < sequence.getSequence().length(); i++) { - if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) { - nonGapped.append(sequence.getCharAt(i)); - } - } - - int absStart = seqString.toString().indexOf(nonGapped.toString()); - - if (absStart == -1) { - unknownSequences.add(sequence.getName()); - features = null; - sbuffer.append(sequence.getName() + - " SEQUENCE NOT %100 MATCH \n"); - - continue; - } - - int absEnd = absStart + nonGapped.toString().length(); - absStart += 1; - - if ((absStart != sequence.getStart()) || - (absEnd != sequence.getEnd())) { - sbuffer.append("Updated: " + sequence.getName() + " " + - sequence.getStart() + "/" + sequence.getEnd() + - " to " + absStart + "/" + absEnd + "\n"); - } - - sequence.setStart(absStart); - sequence.setEnd(absEnd); - } - - if (result[r].indexOf("") > -1) { - if (features != null) { - sequence.setSequenceFeatures(features); - } - - features = null; - sequence = null; - - if (out != null) { - try { - out.writeBytes("\n"); - } catch (Exception ex) { - } - } - } - } + catch (Exception ex) + { + ex.printStackTrace(); } - void findMissingIds(AlignmentI align) { - String data; - ArrayList cachedIds = new ArrayList(); - - try { - BufferedReader in = new BufferedReader(new FileReader( - jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); - - while ((data = in.readLine()) != null) { - if (data.indexOf("name") > -1) { - String name = parseElement(data, "", null); - cachedIds.add(name); - } - } - } catch (Exception ex) { - ex.printStackTrace(); - } - - for (int i = 0; i < align.getHeight(); i++) - if (!cachedIds.contains(align.getSequenceAt(i).getName())) { - unknownSequences.add(align.getSequenceAt(i).getName()); - } + if (sbuffer.length() > 0) + { + output.setText( + "Your sequences have been matched to Uniprot. Some of the ids have been\n" + + "altered, most likely the start/end residue will have been updated.\n" + + "Save your alignment to maintain the updated id.\n\n" + + sbuffer.toString()); + Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); } - void tryLocalCacheFirst(ArrayList ids, AlignmentI align) { - ArrayList cacheData = new ArrayList(); - - try { - BufferedReader in = new BufferedReader(new FileReader( - jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); - - // read through cache file, if the cache has sequences we're looking for - // add the lines to a new String array, Readthis new array and - // make sure we remove the ids from the list to retrieve from EBI - String data; - - while ((data = in.readLine()) != null) { - if (data.indexOf("name") > -1) { - String name = parseElement(data, "", null); - - if (ids.contains(name)) { - cacheData.add(""); - cacheData.add(data); - - while (data.indexOf("") == -1) { - data = in.readLine(); - cacheData.add(data); - } - - cacheData.add(data); - - ids.remove(name); - } - } - } - } catch (Exception ex) { - ex.printStackTrace(); - } - - String[] localData = new String[cacheData.size()]; - cacheData.toArray(localData); - - if ((localData != null) && (localData.length > 0)) { - ReadUniprotFile(localData, null, align); - } + if (unknownSequences.size() > 0) + { + new WSWUBlastClient(ap, align, unknownSequences); } - - String parseValue(String line, String tag, RandomAccessFile out) { - if (out != null) { - try { - out.writeBytes(line + "\n"); - } catch (Exception ex) { - } - } - - int index = line.indexOf(tag) + tag.length() + 1; - - if (index == tag.length()) { - return ""; - } - - return line.substring(index, line.indexOf("\"", index + 1)); + else + ((Alignment)align).featuresAdded = true; + + + ap.repaint(); + } + + /** + * DOCUMENT ME! + * + * @param result DOCUMENT ME! + * @param out DOCUMENT ME! + * @param align DOCUMENT ME! + */ + void ReadUniprotFile(File file, AlignmentI align, Vector ids) + { + if(!file.exists()) + return; + + SequenceI sequence = null; + // String pdb = null; + + Vector entries = getUniprotEntries(file); + + int i, iSize = entries==null?0:entries.size(); + UniprotEntry entry; + for (i = 0; i < iSize; i++) + { + entry = (UniprotEntry) entries.elementAt(i); + String idmatch = entry.getAccession(); + sequence = align.findName(idmatch); + + if (sequence == null) + { + //Sequence maybe Name, not Accession + idmatch = entry.getName(); + sequence = align.findName(idmatch); + } + + if (sequence == null) + { + continue; + } + + ids.remove(sequence.getName()); + unknownSequences.remove(sequence.getName()); + + String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); + + int absStart = entry.getUniprotSequence().getContent().indexOf( + nonGapped.toString()); + + if (absStart == -1) + { + unknownSequences.add(sequence.getName()); + sbuffer.append(sequence.getName() + + " SEQUENCE NOT %100 MATCH \n"); + + continue; + } + + int absEnd = absStart + nonGapped.toString().length(); + absStart += 1; + + if ( (absStart != sequence.getStart()) || + (absEnd != sequence.getEnd())) + { + sbuffer.append("Updated: " + sequence.getName() + " " + + sequence.getStart() + "/" + sequence.getEnd() + + " to " + absStart + "/" + absEnd + "\n"); + } + + sequence.setSequenceFeatures(entry.getFeatures()); + sequence.setStart(absStart); + sequence.setEnd(absEnd); } + } +} - String parseElement(String line, String tag, RandomAccessFile out) { - if (out != null) { - try { - out.writeBytes(line + "\n"); - } catch (Exception ex) { - } - } - - int index = line.indexOf(tag) + tag.length(); - return line.substring(index, line.indexOf("