X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=520ba03826f0ddc6f5c11c07591372b94097a8e8;hb=b9926a366ae75eb28250166fba939d2f9bcf185f;hp=7770cedc82e818b5672f75321fac6f5c0b1530eb;hpb=9270f5adbff6622400c8fc933ebdb652cbb760f5;p=jalview.git diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java index 7770ced..520ba03 100755 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -1,371 +1,247 @@ +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ package jalview.io; +import jalview.datamodel.*; + +import jalview.gui.*; + import java.io.*; + import java.util.*; -import javax.swing.*; -import jalview.io.*; -import jalview.gui.*; -import jalview.datamodel.*; +import org.exolab.castor.mapping.Mapping; + +import org.exolab.castor.xml.*; +import jalview.analysis.AlignSeq; + + + +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ public class SequenceFeatureFetcher implements Runnable { + AlignmentI align; AlignmentPanel ap; ArrayList unknownSequences; - JInternalFrame outputFrame = new JInternalFrame(); - CutAndPasteTransfer output = new CutAndPasteTransfer(false); + CutAndPasteTransfer output = new CutAndPasteTransfer(); StringBuffer sbuffer = new StringBuffer(); + Vector localCache = new Vector(); + + Vector getUniprotEntries(File file) + { + + UniprotFile uni = new UniprotFile(); + try + { + // 1. Load the mapping information from the file + Mapping map = new Mapping(uni.getClass().getClassLoader()); + java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml"); + map.loadMapping(url); + + // 2. Unmarshal the data + Unmarshaller unmar = new Unmarshaller(); + unmar.setIgnoreExtraElements(true); + unmar.setMapping(map); + uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); + localCache.addAll( uni.getUniprotEntries() ); + + // 3. marshal the data with the total price back and print the XML in the console + // Marshaller marshaller = new Marshaller( + // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE")) + // ); + // marshaller.setMapping(map); + // marshaller.marshal(uni); + + } + catch (Exception e) + { + System.out.println("Error getUniprotEntries() "+e); + // e.printStackTrace(); + // if(!updateLocalCache) + // file.delete(); + + } + return uni.getUniprotEntries(); + } + + /** + * Creates a new SequenceFeatureFetcher object. + * + * @param align DOCUMENT ME! + * @param ap DOCUMENT ME! + */ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { unknownSequences = new ArrayList(); this.align = align; this.ap = ap; + Thread thread = new Thread(this); thread.start(); } + /** + * DOCUMENT ME! + */ public void run() -{ - - String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); + { + try + { + int seqIndex = 0; + Vector sequences = align.getSequences(); - RandomAccessFile out = null; + while (seqIndex < sequences.size()) + { + Vector ids = new Vector(); - try{ - if (cache == null) - { - jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); - cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - } + for (int i = 0; (seqIndex < sequences.size()) && (i < 50); + seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + unknownSequences.add(sequence.getName()); + } + /////////////////////////////////// + ///READ FROM EBI + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + for (int i = 0; i < ids.size(); i++) + { + remainingIds.append(ids.get(i) + ";"); + } + EBIFetchClient ebi = new EBIFetchClient(); + File file = ebi.fetchDataAsFile(remainingIds.toString(), + "xml", null); - File test = new File(cache); - if( !test.exists() ) - { - out = new RandomAccessFile(cache, "rw"); - out.writeBytes("\n"); - out.writeBytes("\n"); + if (file != null) + { + ReadUniprotFile(file, align, ids); + } + } + } } - else + catch (Exception ex) { - out = new RandomAccessFile(cache, "rw"); - // open exisiting cache and remove from the end - long lastLine = 0; - String data; - while ( (data = out.readLine()) != null) - { - if (data.indexOf("") > -1) - lastLine = out.getFilePointer(); - - } - out.seek(lastLine); + ex.printStackTrace(); } - int seqIndex = 0; - Vector sequences = align.getSequences(); - - while (seqIndex < sequences.size()) + if (sbuffer.length() > 0) { - ArrayList ids = new ArrayList(); - for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) - { - SequenceI sequence = (SequenceI) sequences.get(seqIndex); - ids.add(sequence.getName()); - } - - tryLocalCacheFirst(ids, align); - - if (ids.size() > 0) - { - StringBuffer remainingIds = new StringBuffer("uniprot:"); - for (int i = 0; i < ids.size(); i++) - remainingIds.append(ids.get(i) + ";"); - - EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); - - if(result!=null) - ReadUniprotFile(result, out, align); - } - + output.setText( + "Your sequences have been matched to Uniprot. Some of the ids have been\n" + + "altered, most likely the start/end residue will have been updated.\n" + + "Save your alignment to maintain the updated id.\n\n" + + sbuffer.toString()); + Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); } - if (out != null) + if (unknownSequences.size() > 0) { - out.writeBytes("\n"); - out.close(); + new WSWUBlastClient(ap, align, unknownSequences); } - }catch(Exception ex){ex.printStackTrace();} + else + ((Alignment)align).featuresAdded = true; - ap.repaint(); - findMissingIds(align); - if(sbuffer.length()>0) - { - output.formatForOutput(); - outputFrame.setContentPane(output); - output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" - +"altered, most likely the start/end residue will have been updated.\n" - +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); - Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); + ap.repaint(); } - if(unknownSequences.size()>0) + /** + * DOCUMENT ME! + * + * @param result DOCUMENT ME! + * @param out DOCUMENT ME! + * @param align DOCUMENT ME! + */ + void ReadUniprotFile(File file, AlignmentI align, Vector ids) { - //ignore for now!!!!!!!!!! - // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); - } - -} + if(!file.exists()) + return; -void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) -{ - SequenceI sequence = null; - Vector features = null; - String type, description, status, start, end, pdb = null; + SequenceI sequence = null; + // String pdb = null; + Vector entries = getUniprotEntries(file); - for (int r = 0; r < result.length; r++) - { - if(sequence==null && result[r].indexOf("")>-1) + int i, iSize = entries==null?0:entries.size(); + UniprotEntry entry; + for (i = 0; i < iSize; i++) { - long filePointer = 0; - - if(out!=null) - try{ - filePointer=out.getFilePointer(); - out.writeBytes("\n"); - }catch(Exception ex){} + entry = (UniprotEntry) entries.elementAt(i); + String idmatch = entry.getAccession(); + sequence = align.findName(idmatch); - String seqName = parseElement( result[r], "" , out); - sequence = align.findName( seqName ) ; - if(sequence==null) + if (sequence == null) { - sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); - if(sequence!=null) - { - sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); - sequence.setName(seqName); - } + //Sequence maybe Name, not Accession + idmatch = entry.getName(); + sequence = align.findName(idmatch); } - if(sequence==null) + + if (sequence == null) { - sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); - sequence = align.findName( result[r] ) ; - - // this entry has been suggested by ebi. - // doesn't match id in alignment file - try { out.setLength(filePointer); } catch (Exception ex) {} - // now skip to next entry - while( result[r].indexOf("")==-1) - r++; + continue; } - features = new Vector(); - type=""; start="0"; end="0"; description=""; status=""; pdb=""; + ids.remove(sequence.getName()); + unknownSequences.remove(sequence.getName()); - } - - if(sequence==null) - continue; - - if( result[r].indexOf("-1) - { - pdb = parseValue( result[r], "value=" , out); - sequence.setPDBId(pdb); - } - - if(result[r].indexOf("feature type")>-1) - { - type = parseValue( result[r], "type=" , out); - description = parseValue( result[r], "description=" , null ); - status = parseValue ( result[r], "status=", null); - - while( result[r].indexOf("position")==-1) - { - r++; // - } - // r++; - if(result[r].indexOf("begin")>-1) - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[++r], "position=" , out); - } - else - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[r], "position=" , null); - } - int sstart = Integer.parseInt(start); - int eend = Integer.parseInt(end); - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - SequenceFeature sf = new SequenceFeature(type, - sstart, - eend, - description, - status); - features.add(sf); - } - - if(result[r].indexOf("-1) - { - StringBuffer seqString = new StringBuffer(); - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - while(result[++r].indexOf("")==-1) - { - seqString.append(result[r]); - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - } - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - StringBuffer nonGapped = new StringBuffer(); - for (int i = 0; i < sequence.getSequence().length(); i++) - { - if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) - nonGapped.append(sequence.getCharAt(i)); - } - - int absStart = seqString.toString().indexOf(nonGapped.toString()); - if(absStart==-1) - { - unknownSequences.add(sequence.getName()); - features = null; - sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); - continue; - } - - int absEnd = absStart + nonGapped.toString().length(); - absStart+=1; - - if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) - sbuffer.append("Updated: "+sequence.getName()+" "+ - sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); - - - sequence.setStart(absStart); - sequence.setEnd(absEnd); - - } - - if(result[r].indexOf("")>-1) - { - if(features!=null) - sequence.setSequenceFeatures( features ); - features = null; - sequence = null; - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - } - } -} + String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()); -void findMissingIds(AlignmentI align) -{ - String data; - ArrayList cachedIds = new ArrayList(); - - try - { - BufferedReader in = new BufferedReader( - new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + int absStart = entry.getUniprotSequence().getContent().indexOf( + nonGapped.toString()); - while ( (data = in.readLine()) != null) - { - if (data.indexOf("name") > -1) + if (absStart == -1) { - String name = parseElement(data, "", null); - cachedIds.add(name); - } - } - } - catch (Exception ex) - { ex.printStackTrace(); } + unknownSequences.add(sequence.getName()); + sbuffer.append(sequence.getName() + + " SEQUENCE NOT %100 MATCH \n"); - for(int i=0; i-1) + if ( (absStart != sequence.getStart()) || + (absEnd != sequence.getEnd())) { - String name = parseElement( data, "" , null) ; - if(ids.contains( name ) ) - { - cacheData.add(""); - cacheData.add(data); - while( data.indexOf("")==-1) - { - data = in.readLine(); - cacheData.add(data); - } - cacheData.add(data); - - ids.remove( name ); - } + sbuffer.append("Updated: " + sequence.getName() + " " + + sequence.getStart() + "/" + sequence.getEnd() + + " to " + absStart + "/" + absEnd + "\n"); } + + sequence.setSequenceFeatures(entry.getFeatures()); + sequence.setStart(absStart); + sequence.setEnd(absEnd); } } - catch(Exception ex){ex.printStackTrace();} - - String [] localData = new String[cacheData.size()]; - cacheData.toArray( localData ); - if(localData!=null && localData.length>0) - ReadUniprotFile(localData, null, align); -} - - -String parseValue(String line, String tag, RandomAccessFile out) -{ - if(out!=null) - try{ out.writeBytes(line+"\n"); }catch(Exception ex){} - - - int index = line.indexOf(tag)+tag.length()+1; - if(index==tag.length()) - return ""; - - return line.substring( index, line.indexOf("\"", index+1) ); } -String parseElement(String line, String tag, RandomAccessFile out) -{ - if (out != null) - try - { - out.writeBytes(line + "\n"); - } - catch (Exception ex) - {} - - int index = line.indexOf(tag) + tag.length(); - return line.substring(index, line.indexOf("