X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=b3de0f3843346cd303d1cb4c7996881ecd021e1c;hb=32ce9ddb7ce1a68add53dd81785ae428ca136a83;hp=0a7817a9f5456a1c9ed874ef9adfe14850128cae;hpb=f24dacb1da56fccf05d684e2f4899facec2aecf7;p=jalview.git diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java index 0a7817a..b3de0f3 100755 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -26,6 +26,12 @@ import java.io.*; import java.util.*; +import org.exolab.castor.mapping.Mapping; + +import org.exolab.castor.xml.*; +import jalview.analysis.AlignSeq; + + /** * DOCUMENT ME! @@ -35,527 +41,280 @@ import java.util.*; */ public class SequenceFeatureFetcher implements Runnable { - AlignmentI align; - AlignmentPanel ap; - ArrayList unknownSequences; - CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - - /** - * Creates a new SequenceFeatureFetcher object. - * - * @param align DOCUMENT ME! - * @param ap DOCUMENT ME! - */ - public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) - { - unknownSequences = new ArrayList(); - this.align = align; - this.ap = ap; - - Thread thread = new Thread(this); - thread.start(); - } - /** - * DOCUMENT ME! - */ - public void run() - { - RandomAccessFile out = null; + AlignmentI align; + AlignmentI dataset; + AlignmentPanel ap; + ArrayList unknownSequences; + CutAndPasteTransfer output = new CutAndPasteTransfer(); + StringBuffer sbuffer = new StringBuffer(); - try - { - String cache = System.getProperty("user.home") + - "/.jalview.uniprot.xml"; + Vector getUniprotEntries(File file) + { - File test = new File(cache); + UniprotFile uni = new UniprotFile(); + try + { + // 1. Load the mapping information from the file + Mapping map = new Mapping(uni.getClass().getClassLoader()); + java.net.URL url = getClass().getResource("/uniprot_mapping.xml"); + map.loadMapping(url); - if (!test.exists()) - { - out = new RandomAccessFile(cache, "rw"); - out.writeBytes("\n"); - out.writeBytes("\n"); - } - else - { - out = new RandomAccessFile(cache, "rw"); + // 2. Unmarshal the data + Unmarshaller unmar = new Unmarshaller(); + unmar.setIgnoreExtraElements(true); + unmar.setMapping(map); + uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); - // open exisiting cache and remove from the end - long lastLine = 0; - String data; + } + catch (Exception e) + { + System.out.println("Error getUniprotEntries() "+e); + } + return uni.getUniprotEntries(); + } + + /** + * Creates a new SequenceFeatureFetcher object. + * + * @param align DOCUMENT ME! + * @param ap DOCUMENT ME! + */ + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) + { + unknownSequences = new ArrayList(); + this.align = align; + this.dataset = align.getDataset(); + this.ap = ap; + + Thread thread = new Thread(this); + thread.start(); + } + + /** + * DOCUMENT ME! + */ + public void run() + { + try + { + int seqIndex = 0; + Vector sequences = dataset.getSequences(); - while ((data = out.readLine()) != null) - { - if (data.indexOf("") > -1) - { - lastLine = out.getFilePointer(); - } - } + while (seqIndex < sequences.size()) + { + Vector ids = new Vector(); - out.seek(lastLine); - } + for (int i = 0; (seqIndex < sequences.size()) && (i < 50); + seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + if(!ids.contains(sequence.getName())) + { + ids.add(sequence.getName()); + unknownSequences.add(sequence); + } + } - int seqIndex = 0; - Vector sequences = align.getSequences(); + /////////////////////////////////// + ///READ FROM EBI + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + for (int i = 0; i < ids.size(); i++) + { + remainingIds.append(ids.get(i) + ";"); + } + EBIFetchClient ebi = new EBIFetchClient(); + File file = ebi.fetchDataAsFile(remainingIds.toString(), + "xml", null); - while (seqIndex < sequences.size()) - { - ArrayList ids = new ArrayList(); - for (int i = 0; (seqIndex < sequences.size()) && (i < 50); - seqIndex++, i++) - { - SequenceI sequence = (SequenceI) sequences.get(seqIndex); - ids.add(sequence.getName()); - } - tryLocalCacheFirst(ids, align); + if (file != null) + { + ReadUniprotFile(file, ids); + } + } + } + } + catch (Exception ex) + { + ex.printStackTrace(); + } - if (ids.size() > 0) - { - StringBuffer remainingIds = new StringBuffer("uniprot:"); + if (sbuffer.length() > 0) + { + output.setText( + "Your sequences have been matched to Uniprot. Some of the ids have been\n" + + "altered, most likely the start/end residue will have been updated.\n" + + "Save your alignment to maintain the updated id.\n\n" + + sbuffer.toString()); + Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); + // The above is the dataset, we must now find out the index + // of the viewed sequence - for (int i = 0; i < ids.size(); i++) - remainingIds.append(ids.get(i) + ";"); + } - EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), - "xml", null); + promptBeforeBlast(); - if (result != null) - { - ReadUniprotFile(result, out, align); - } - } - } + } - if (out != null) - { - out.writeBytes("\n"); - out.close(); - } - } - catch (Exception ex) - { - ex.printStackTrace(); - } - findMissingIds(align); + void promptBeforeBlast() + { + // This must be outside the run() body as java 1.5 + // will not return any value from the OptionPane to the expired thread. + if (unknownSequences.size() > 0) + { + int reply = javax.swing.JOptionPane.showConfirmDialog( + Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences." + +"\nPerform blast for unknown sequences?", + "Blast for Unidentified Sequences", + javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE); - if (sbuffer.length() > 0) - { - output.setText( - "Your sequences have been matched to Uniprot. Some of the ids have been\n" + - "altered, most likely the start/end residue will have been updated.\n" + - "Save your alignment to maintain the updated id.\n\n" + - sbuffer.toString()); - Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); - } + if(reply == javax.swing.JOptionPane.YES_OPTION) + new WSWUBlastClient(ap, align, unknownSequences); + } + else + ((Alignment)dataset).featuresAdded = true; - if (unknownSequences.size() > 0) - { - //ignore for now!!!!!!!!!! - // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); - } - jalview.gui.PaintRefresher.Refresh(null, align); - } + ap.repaint(); + } - /** - * DOCUMENT ME! - * - * @param result DOCUMENT ME! - * @param out DOCUMENT ME! - * @param align DOCUMENT ME! - */ - void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) - { - SequenceI sequence = null; - Vector features = null; - String type; - String description; - String status; - String start; - String end; - String pdb = null; - - for (int r = 0; r < result.length; r++) - { - if ((sequence == null) && (result[r].indexOf("") > -1)) - { - long filePointer = 0; - - if (out != null) - { - try - { - filePointer = out.getFilePointer(); - out.writeBytes("\n"); - } - catch (Exception ex) - { - } - } - - String seqName = parseElement(result[r], "", out); - sequence = align.findName(seqName); - - if (sequence == null) - { - sequence = align.findName(seqName.substring(0, - seqName.indexOf('_'))); - - if (sequence != null) - { - sbuffer.append("changing " + sequence.getName() + - " to " + seqName + "\n"); - sequence.setName(seqName); - } - } - - if (sequence == null) - { - sbuffer.append("UNIPROT updated suggestion is " + - result[r] + "\n"); - sequence = align.findName(result[r]); - - // this entry has been suggested by ebi. - // doesn't match id in alignment file - try - { - out.setLength(filePointer); - } - catch (Exception ex) - { - } - - // now skip to next entry - while (result[r].indexOf("") == -1) - r++; - } - - features = new Vector(); - type = ""; - start = "0"; - end = "0"; - description = ""; - status = ""; - pdb = ""; - } + /** + * DOCUMENT ME! + * + * @param result DOCUMENT ME! + * @param out DOCUMENT ME! + * @param align DOCUMENT ME! + */ + void ReadUniprotFile(File file, Vector ids) + { + if(!file.exists()) + return; - if (sequence == null) - { - continue; - } + SequenceI sequence = null; - if (result[r].indexOf(" -1) - { - pdb = parseValue(result[r], "value=", out); - sequence.setPDBId(pdb); - } + Vector entries = getUniprotEntries(file); - if (result[r].indexOf("feature type") > -1) - { - type = parseValue(result[r], "type=", out); - description = parseValue(result[r], "description=", null); - status = parseValue(result[r], "status=", null); - - while (result[r].indexOf("position") == -1) - { - r++; // - } - - // r++; - if (result[r].indexOf("begin") > -1) - { - start = parseValue(result[r], "position=", out); - end = parseValue(result[++r], "position=", out); - } - else - { - start = parseValue(result[r], "position=", out); - end = parseValue(result[r], "position=", null); - } - - int sstart = Integer.parseInt(start); - int eend = Integer.parseInt(end); - - if (out != null) - { - try - { - out.writeBytes("\n"); - } - catch (Exception ex) - { - } - } - - SequenceFeature sf = new SequenceFeature(type, sstart, eend, - description, status); - features.add(sf); - } - - if (result[r].indexOf(" -1) - { - StringBuffer seqString = new StringBuffer(); - - if (out != null) - { - try - { - out.writeBytes(result[r] + "\n"); - } - catch (Exception ex) - { - } - } - - while (result[++r].indexOf("") == -1) - { - seqString.append(result[r]); - - if (out != null) - { - try - { - out.writeBytes(result[r] + "\n"); - } - catch (Exception ex) - { - } - } - } - - if (out != null) - { - try - { - out.writeBytes(result[r] + "\n"); - } - catch (Exception ex) - { - } - } - - StringBuffer nonGapped = new StringBuffer(); - - for (int i = 0; i < sequence.getSequence().length(); i++) - { - if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) - { - nonGapped.append(sequence.getCharAt(i)); - } - } - - int absStart = seqString.toString().indexOf(nonGapped.toString()); - - if (absStart == -1) - { - unknownSequences.add(sequence.getName()); - features = null; - sbuffer.append(sequence.getName() + - " SEQUENCE NOT %100 MATCH \n"); - - continue; - } - - int absEnd = absStart + nonGapped.toString().length(); - absStart += 1; - - if ((absStart != sequence.getStart()) || - (absEnd != sequence.getEnd())) - { - sbuffer.append("Updated: " + sequence.getName() + " " + - sequence.getStart() + "/" + sequence.getEnd() + - " to " + absStart + "/" + absEnd + "\n"); - } - - sequence.setStart(absStart); - sequence.setEnd(absEnd); - } - - if (result[r].indexOf("") > -1) - { - if (features != null) - { - sequence.setSequenceFeatures(features); - } - - features = null; - sequence = null; - - if (out != null) - { - try - { - out.writeBytes("\n"); - } - catch (Exception ex) - { - } - } - } - } - } - - /** - * DOCUMENT ME! - * - * @param align DOCUMENT ME! - */ - void findMissingIds(AlignmentI align) + int i, iSize = entries==null?0:entries.size(); + UniprotEntry entry; + for (i = 0; i < iSize; i++) { - String data; - ArrayList cachedIds = new ArrayList(); - - try + entry = (UniprotEntry) entries.elementAt(i); + String idmatch = entry.getAccession().elementAt(0).toString(); + sequence = dataset.findName(idmatch); + + if (sequence == null) + { + //Sequence maybe Name, not Accession + idmatch = entry.getName().elementAt(0).toString(); + sequence = dataset.findName(idmatch); + } + + if (sequence == null) + { + System.out.println(idmatch+" not found"); + continue; + } + + ids.remove(sequence.getName()); + unknownSequences.remove(sequence); + + String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); + + int absStart = entry.getUniprotSequence().getContent().indexOf( + nonGapped.toString()); + + if (absStart == -1) + { + // Is UniprotSequence contained in dataset sequence? + absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent()); + if(absStart == -1) { - if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null) - return; - - BufferedReader in = new BufferedReader(new FileReader( - jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + unknownSequences.add(sequence.getName()); + sbuffer.append(sequence.getName() + + " SEQUENCE NOT %100 MATCH \n"); - while ((data = in.readLine()) != null) - { - if (data.indexOf("name") > -1) - { - String name = parseElement(data, "", null); - cachedIds.add(name); - } - } + continue; } - catch (Exception ex) + else { - ex.printStackTrace(); - } - - for (int i = 0; i < align.getHeight(); i++) - if (!cachedIds.contains(align.getSequenceAt(i).getName())) + if(entry.getFeature()!=null) + { + Enumeration e = entry.getFeature().elements(); + while (e.hasMoreElements()) { - unknownSequences.add(align.getSequenceAt(i).getName()); + SequenceFeature sf = (SequenceFeature) e.nextElement(); + sf.setBegin(sf.getBegin() + absStart + 1); + sf.setEnd(sf.getEnd() + absStart + 1); } - } + } - /** - * DOCUMENT ME! - * - * @param ids DOCUMENT ME! - * @param align DOCUMENT ME! - */ - void tryLocalCacheFirst(ArrayList ids, AlignmentI align) - { - ArrayList cacheData = new ArrayList(); + sbuffer.append(sequence.getName() + + " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES" + +" HAVE BEEN ADJUSTED ACCORDINGLY \n"); + absStart = 0; + } - try - { - if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null) - return; + } - BufferedReader in = new BufferedReader(new FileReader( - jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + int absEnd = absStart + nonGapped.toString().length(); + absStart += 1; - // read through cache file, if the cache has sequences we're looking for - // add the lines to a new String array, Readthis new array and - // make sure we remove the ids from the list to retrieve from EBI - String data; + Enumeration e = entry.getDbReference().elements(); + Vector onlyPdbEntries = new Vector(); + while(e.hasMoreElements()) + { + PDBEntry pdb = (PDBEntry)e.nextElement(); + if(!pdb.getType().equals("PDB")) + continue; - while ((data = in.readLine()) != null) - { - if (data.indexOf("name") > -1) - { - String name = parseElement(data, "", null); - - if (ids.contains(name)) - { - cacheData.add(""); - cacheData.add(data); - - while (data.indexOf("") == -1) - { - data = in.readLine(); - cacheData.add(data); - } - - cacheData.add(data); - - ids.remove(name); - } - } - } - } - catch (Exception ex) - { - ex.printStackTrace(); - } + onlyPdbEntries.addElement(pdb); + } - String[] localData = new String[cacheData.size()]; - cacheData.toArray(localData); + sequence.setPDBId(onlyPdbEntries); + sequence.setSequenceFeatures(entry.getFeature()); + sequence.setStart(absStart); + sequence.setEnd(absEnd); - if ((localData != null) && (localData.length > 0)) - { - ReadUniprotFile(localData, null, align); - } - } - /** - * DOCUMENT ME! - * - * @param line DOCUMENT ME! - * @param tag DOCUMENT ME! - * @param out DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - String parseValue(String line, String tag, RandomAccessFile out) - { - if (out != null) + int n = 0; + SequenceI seq2; + while (n < align.getHeight()) + { + //This loop enables multiple sequences with the same + //id to have features added and seq limits updated + seq2 = align.getSequenceAt(n); + if (seq2.getName().equals(idmatch)) { - try - { - out.writeBytes(line + "\n"); - } - catch (Exception ex) - { - } - } - int index = line.indexOf(tag) + tag.length() + 1; + nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence()); - if (index == tag.length()) - { - return ""; - } + absStart = sequence.getSequence().indexOf(nonGapped); + absEnd = absStart + nonGapped.toString().length() - 1; - return line.substring(index, line.indexOf("\"", index + 1)); - } + // This is the Viewd alignment sequences + // No need to tell the user of the dataset updates + if ( (seq2.getStart() != absStart+sequence.getStart()) + || (seq2.getEnd() != absEnd+sequence.getStart())) + { + sbuffer.append("Updated: " + seq2.getName() + " " + + seq2.getStart() + "/" + seq2.getEnd() + + " to " + (absStart + sequence.getStart()) + "/" + + (absEnd + sequence.getStart()) + "\n"); - /** - * DOCUMENT ME! - * - * @param line DOCUMENT ME! - * @param tag DOCUMENT ME! - * @param out DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - String parseElement(String line, String tag, RandomAccessFile out) - { - if (out != null) - { - try - { - out.writeBytes(line + "\n"); - } - catch (Exception ex) - { - } + seq2.setStart(absStart + sequence.getStart()); + seq2.setEnd(absEnd + sequence.getStart()); + } } - int index = line.indexOf(tag) + tag.length(); - - return line.substring(index, line.indexOf("