X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=bb404d8310657e0e2e311a1cb50f764ace795c8d;hb=dcd8d3212242ef3bde7ff9d865f9dfb859407a30;hp=afe7b6e191db8614f5e1ac913966439a8453bc1e;hpb=b618755c67d798ace34ebd6d0f384e913e8574c9;p=jalview.git diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java index afe7b6e..bb404d8 100755 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -1,370 +1,417 @@ +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ package jalview.io; +import jalview.datamodel.*; + +import jalview.gui.*; + +import jalview.io.*; + import java.io.*; + import java.util.*; -import javax.swing.*; -import jalview.io.*; -import jalview.gui.*; -import jalview.datamodel.*; -public class SequenceFeatureFetcher implements Runnable -{ - AlignmentI align; - AlignmentPanel ap; - ArrayList unknownSequences; - JInternalFrame outputFrame = new JInternalFrame(); - CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - - public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) - { - unknownSequences = new ArrayList(); - this.align = align; - this.ap = ap; - Thread thread = new Thread(this); - thread.start(); - } - - public void run() -{ - - String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - - RandomAccessFile out = null; - - try{ - if (cache == null) - { - jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); - cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - } +public class SequenceFeatureFetcher implements Runnable { + AlignmentI align; + AlignmentPanel ap; + ArrayList unknownSequences; + CutAndPasteTransfer output = new CutAndPasteTransfer(); + StringBuffer sbuffer = new StringBuffer(); + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { + unknownSequences = new ArrayList(); + this.align = align; + this.ap = ap; - File test = new File(cache); - if( !test.exists() ) - { - out = new RandomAccessFile(cache, "rw"); - out.writeBytes("\n"); - out.writeBytes("\n"); - } - else - { - out = new RandomAccessFile(cache, "rw"); - // open exisiting cache and remove from the end - long lastLine = 0; - String data; - while ( (data = out.readLine()) != null) - { - if (data.indexOf("") > -1) - lastLine = out.getFilePointer(); - - } - out.seek(lastLine); + Thread thread = new Thread(this); + thread.start(); } - int seqIndex = 0; - Vector sequences = align.getSequences(); + public void run() { + String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - while (seqIndex < sequences.size()) - { - ArrayList ids = new ArrayList(); - for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) - { - SequenceI sequence = (SequenceI) sequences.get(seqIndex); - ids.add(sequence.getName()); - } + RandomAccessFile out = null; - tryLocalCacheFirst(ids, align); + try { + if (cache == null) { + jalview.bin.Cache.setProperty("UNIPROT_CACHE", + System.getProperty("user.home") + "/.jalview.uniprot.xml"); + cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); + } - if (ids.size() > 0) - { - StringBuffer remainingIds = new StringBuffer("uniprot:"); - for (int i = 0; i < ids.size(); i++) - remainingIds.append(ids.get(i) + ";"); + File test = new File(cache); - EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); + if (!test.exists()) { + out = new RandomAccessFile(cache, "rw"); + out.writeBytes("\n"); + out.writeBytes("\n"); + } else { + out = new RandomAccessFile(cache, "rw"); - if(result!=null) - ReadUniprotFile(result, out, align); - } + // open exisiting cache and remove from the end + long lastLine = 0; + String data; - } + while ((data = out.readLine()) != null) { + if (data.indexOf("") > -1) { + lastLine = out.getFilePointer(); + } + } - if (out != null) - { - out.writeBytes("\n"); - out.close(); - } - }catch(Exception ex){ex.printStackTrace();} - - ap.repaint(); - findMissingIds(align); - if(sbuffer.length()>0) - { - outputFrame.setContentPane(output); - output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" - +"altered, most likely the start/end residue will have been updated.\n" - +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); - Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); - - } - - if(unknownSequences.size()>0) - { - //ignore for now!!!!!!!!!! - // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); - } + out.seek(lastLine); + } -} + int seqIndex = 0; + Vector sequences = align.getSequences(); + + while (seqIndex < sequences.size()) { + ArrayList ids = new ArrayList(); + + for (int i = 0; (seqIndex < sequences.size()) && (i < 50); + seqIndex++, i++) { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + } + + tryLocalCacheFirst(ids, align); + + if (ids.size() > 0) { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + + for (int i = 0; i < ids.size(); i++) + remainingIds.append(ids.get(i) + ";"); + + EBIFetchClient ebi = new EBIFetchClient(); + String[] result = ebi.fetchData(remainingIds.toString(), + "xml", null); + + if (result != null) { + ReadUniprotFile(result, out, align); + } + } + } + + if (out != null) { + out.writeBytes("\n"); + out.close(); + } + } catch (Exception ex) { + ex.printStackTrace(); + } + + jalview.gui.PaintRefresher.Refresh(null, align); + findMissingIds(align); -void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) -{ - SequenceI sequence = null; - Vector features = null; - String type, description, status, start, end, pdb = null; - - - for (int r = 0; r < result.length; r++) - { - if(sequence==null && result[r].indexOf("")>-1) - { - long filePointer = 0; - - if(out!=null) - try{ - filePointer=out.getFilePointer(); - out.writeBytes("\n"); - }catch(Exception ex){} - - String seqName = parseElement( result[r], "" , out); - sequence = align.findName( seqName ) ; - if(sequence==null) - { - sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); - if(sequence!=null) - { - sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); - sequence.setName(seqName); + if (sbuffer.length() > 0) { + output.setText( + "Your sequences have been matched to Uniprot. Some of the ids have been\n" + + "altered, most likely the start/end residue will have been updated.\n" + + "Save your alignment to maintain the updated id.\n\n" + + sbuffer.toString()); + Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); } - } - if(sequence==null) - { - sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); - sequence = align.findName( result[r] ) ; - - // this entry has been suggested by ebi. - // doesn't match id in alignment file - try { out.setLength(filePointer); } catch (Exception ex) {} - // now skip to next entry - while( result[r].indexOf("")==-1) - r++; - } - - features = new Vector(); - type=""; start="0"; end="0"; description=""; status=""; pdb=""; + if (unknownSequences.size() > 0) { + //ignore for now!!!!!!!!!! + // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); + } } - if(sequence==null) - continue; - - if( result[r].indexOf("-1) - { - pdb = parseValue( result[r], "value=" , out); - sequence.setPDBId(pdb); - } - - if(result[r].indexOf("feature type")>-1) - { - type = parseValue( result[r], "type=" , out); - description = parseValue( result[r], "description=" , null ); - status = parseValue ( result[r], "status=", null); - - while( result[r].indexOf("position")==-1) - { - r++; // - } - // r++; - if(result[r].indexOf("begin")>-1) - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[++r], "position=" , out); - } - else - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[r], "position=" , null); - } - int sstart = Integer.parseInt(start); - int eend = Integer.parseInt(end); - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - SequenceFeature sf = new SequenceFeature(type, - sstart, - eend, - description, - status); - features.add(sf); - } - - if(result[r].indexOf("-1) - { - StringBuffer seqString = new StringBuffer(); - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - while(result[++r].indexOf("")==-1) - { - seqString.append(result[r]); - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - } - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - StringBuffer nonGapped = new StringBuffer(); - for (int i = 0; i < sequence.getSequence().length(); i++) - { - if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) - nonGapped.append(sequence.getCharAt(i)); - } - - int absStart = seqString.toString().indexOf(nonGapped.toString()); - if(absStart==-1) - { - unknownSequences.add(sequence.getName()); - features = null; - sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); - continue; - } - - int absEnd = absStart + nonGapped.toString().length(); - absStart+=1; - - if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) - sbuffer.append("Updated: "+sequence.getName()+" "+ - sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); - - - sequence.setStart(absStart); - sequence.setEnd(absEnd); - - } - - if(result[r].indexOf("")>-1) - { - if(features!=null) - sequence.setSequenceFeatures( features ); - features = null; - sequence = null; - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - } - } -} + void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) { + SequenceI sequence = null; + Vector features = null; + String type; + String description; + String status; + String start; + String end; + String pdb = null; + + for (int r = 0; r < result.length; r++) { + if ((sequence == null) && (result[r].indexOf("") > -1)) { + long filePointer = 0; + + if (out != null) { + try { + filePointer = out.getFilePointer(); + out.writeBytes("\n"); + } catch (Exception ex) { + } + } + + String seqName = parseElement(result[r], "", out); + sequence = align.findName(seqName); + + if (sequence == null) { + sequence = align.findName(seqName.substring(0, + seqName.indexOf('_'))); + + if (sequence != null) { + sbuffer.append("changing " + sequence.getName() + + " to " + seqName + "\n"); + sequence.setName(seqName); + } + } + + if (sequence == null) { + sbuffer.append("UNIPROT updated suggestion is " + + result[r] + "\n"); + sequence = align.findName(result[r]); + + // this entry has been suggested by ebi. + // doesn't match id in alignment file + try { + out.setLength(filePointer); + } catch (Exception ex) { + } + + // now skip to next entry + while (result[r].indexOf("") == -1) + r++; + } + + features = new Vector(); + type = ""; + start = "0"; + end = "0"; + description = ""; + status = ""; + pdb = ""; + } + + if (sequence == null) { + continue; + } + + if (result[r].indexOf(" -1) { + pdb = parseValue(result[r], "value=", out); + sequence.setPDBId(pdb); + } + + if (result[r].indexOf("feature type") > -1) { + type = parseValue(result[r], "type=", out); + description = parseValue(result[r], "description=", null); + status = parseValue(result[r], "status=", null); + + while (result[r].indexOf("position") == -1) { + r++; // + } + + // r++; + if (result[r].indexOf("begin") > -1) { + start = parseValue(result[r], "position=", out); + end = parseValue(result[++r], "position=", out); + } else { + start = parseValue(result[r], "position=", out); + end = parseValue(result[r], "position=", null); + } + + int sstart = Integer.parseInt(start); + int eend = Integer.parseInt(end); + + if (out != null) { + try { + out.writeBytes("\n"); + } catch (Exception ex) { + } + } + + SequenceFeature sf = new SequenceFeature(type, sstart, eend, + description, status); + features.add(sf); + } + + if (result[r].indexOf(" -1) { + StringBuffer seqString = new StringBuffer(); + + if (out != null) { + try { + out.writeBytes(result[r] + "\n"); + } catch (Exception ex) { + } + } + + while (result[++r].indexOf("") == -1) { + seqString.append(result[r]); + + if (out != null) { + try { + out.writeBytes(result[r] + "\n"); + } catch (Exception ex) { + } + } + } + + if (out != null) { + try { + out.writeBytes(result[r] + "\n"); + } catch (Exception ex) { + } + } + + StringBuffer nonGapped = new StringBuffer(); + + for (int i = 0; i < sequence.getSequence().length(); i++) { + if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) { + nonGapped.append(sequence.getCharAt(i)); + } + } + + int absStart = seqString.toString().indexOf(nonGapped.toString()); + + if (absStart == -1) { + unknownSequences.add(sequence.getName()); + features = null; + sbuffer.append(sequence.getName() + + " SEQUENCE NOT %100 MATCH \n"); + + continue; + } + + int absEnd = absStart + nonGapped.toString().length(); + absStart += 1; + + if ((absStart != sequence.getStart()) || + (absEnd != sequence.getEnd())) { + sbuffer.append("Updated: " + sequence.getName() + " " + + sequence.getStart() + "/" + sequence.getEnd() + + " to " + absStart + "/" + absEnd + "\n"); + } + + sequence.setStart(absStart); + sequence.setEnd(absEnd); + } + + if (result[r].indexOf("") > -1) { + if (features != null) { + sequence.setSequenceFeatures(features); + } + + features = null; + sequence = null; + + if (out != null) { + try { + out.writeBytes("\n"); + } catch (Exception ex) { + } + } + } + } + } -void findMissingIds(AlignmentI align) -{ - String data; - ArrayList cachedIds = new ArrayList(); - - try - { - BufferedReader in = new BufferedReader( - new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); - - while ( (data = in.readLine()) != null) - { - if (data.indexOf("name") > -1) - { - String name = parseElement(data, "", null); - cachedIds.add(name); - } + void findMissingIds(AlignmentI align) { + String data; + ArrayList cachedIds = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader( + jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + + while ((data = in.readLine()) != null) { + if (data.indexOf("name") > -1) { + String name = parseElement(data, "", null); + cachedIds.add(name); + } + } + } catch (Exception ex) { + ex.printStackTrace(); + } + + for (int i = 0; i < align.getHeight(); i++) + if (!cachedIds.contains(align.getSequenceAt(i).getName())) { + unknownSequences.add(align.getSequenceAt(i).getName()); + } } - } - catch (Exception ex) - { ex.printStackTrace(); } - for(int i=0; i-1) - { - String name = parseElement( data, "" , null) ; - if(ids.contains( name ) ) - { - cacheData.add(""); - cacheData.add(data); - while( data.indexOf("")==-1) - { - data = in.readLine(); - cacheData.add(data); - } - cacheData.add(data); - - ids.remove( name ); - } - } - } - } - catch(Exception ex){ex.printStackTrace();} + while ((data = in.readLine()) != null) { + if (data.indexOf("name") > -1) { + String name = parseElement(data, "", null); - String [] localData = new String[cacheData.size()]; - cacheData.toArray( localData ); - if(localData!=null && localData.length>0) - ReadUniprotFile(localData, null, align); -} + if (ids.contains(name)) { + cacheData.add(""); + cacheData.add(data); + while (data.indexOf("") == -1) { + data = in.readLine(); + cacheData.add(data); + } -String parseValue(String line, String tag, RandomAccessFile out) -{ - if(out!=null) - try{ out.writeBytes(line+"\n"); }catch(Exception ex){} + cacheData.add(data); + ids.remove(name); + } + } + } + } catch (Exception ex) { + ex.printStackTrace(); + } - int index = line.indexOf(tag)+tag.length()+1; - if(index==tag.length()) - return ""; + String[] localData = new String[cacheData.size()]; + cacheData.toArray(localData); - return line.substring( index, line.indexOf("\"", index+1) ); -} + if ((localData != null) && (localData.length > 0)) { + ReadUniprotFile(localData, null, align); + } + } + + String parseValue(String line, String tag, RandomAccessFile out) { + if (out != null) { + try { + out.writeBytes(line + "\n"); + } catch (Exception ex) { + } + } + int index = line.indexOf(tag) + tag.length() + 1; -String parseElement(String line, String tag, RandomAccessFile out) -{ - if (out != null) - try - { - out.writeBytes(line + "\n"); + if (index == tag.length()) { + return ""; + } + + return line.substring(index, line.indexOf("\"", index + 1)); } - catch (Exception ex) - {} - int index = line.indexOf(tag) + tag.length(); - return line.substring(index, line.indexOf("