X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=89d84b03b12b760a0474b4e40953beaf705aff3c;hb=fbec1b33d0fc169d72be059a5d2cf12b248270e1;hp=afe7b6e191db8614f5e1ac913966439a8453bc1e;hpb=b618755c67d798ace34ebd6d0f384e913e8574c9;p=jalview.git diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java index afe7b6e..89d84b0 100755 --- a/src/jalview/io/SequenceFeatureFetcher.java +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -1,370 +1,557 @@ +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ package jalview.io; -import java.io.*; -import java.util.*; -import javax.swing.*; -import jalview.io.*; -import jalview.gui.*; import jalview.datamodel.*; -public class SequenceFeatureFetcher implements Runnable -{ - AlignmentI align; - AlignmentPanel ap; - ArrayList unknownSequences; - JInternalFrame outputFrame = new JInternalFrame(); - CutAndPasteTransfer output = new CutAndPasteTransfer(); - StringBuffer sbuffer = new StringBuffer(); - - public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) - { - unknownSequences = new ArrayList(); - this.align = align; - this.ap = ap; - Thread thread = new Thread(this); - thread.start(); - } - - public void run() -{ - - String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); +import jalview.gui.*; - RandomAccessFile out = null; +import jalview.io.*; - try{ - if (cache == null) - { - jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); - cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); - } +import java.io.*; +import java.util.*; - File test = new File(cache); - if( !test.exists() ) - { - out = new RandomAccessFile(cache, "rw"); - out.writeBytes("\n"); - out.writeBytes("\n"); - } - else +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class SequenceFeatureFetcher implements Runnable +{ + AlignmentI align; + AlignmentPanel ap; + ArrayList unknownSequences; + CutAndPasteTransfer output = new CutAndPasteTransfer(); + StringBuffer sbuffer = new StringBuffer(); + + /** + * Creates a new SequenceFeatureFetcher object. + * + * @param align DOCUMENT ME! + * @param ap DOCUMENT ME! + */ + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { - out = new RandomAccessFile(cache, "rw"); - // open exisiting cache and remove from the end - long lastLine = 0; - String data; - while ( (data = out.readLine()) != null) - { - if (data.indexOf("") > -1) - lastLine = out.getFilePointer(); - - } - out.seek(lastLine); - } + unknownSequences = new ArrayList(); + this.align = align; + this.ap = ap; - int seqIndex = 0; - Vector sequences = align.getSequences(); + Thread thread = new Thread(this); + thread.start(); + } - while (seqIndex < sequences.size()) + /** + * DOCUMENT ME! + */ + public void run() { - ArrayList ids = new ArrayList(); - for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) - { - SequenceI sequence = (SequenceI) sequences.get(seqIndex); - ids.add(sequence.getName()); - } + RandomAccessFile out = null; - tryLocalCacheFirst(ids, align); + try + { + String cache = System.getProperty("user.home") + + "/.jalview.uniprot.xml"; + + File test = new File(cache); + + if (!test.exists()) + { + out = new RandomAccessFile(cache, "rw"); + out.writeBytes("\n"); + out.writeBytes("\n"); + } + else + { + out = new RandomAccessFile(cache, "rw"); + + // open exisiting cache and remove from the end + long lastLine = 0; + String data; + + while ((data = out.readLine()) != null) + { + if (data.indexOf("") > -1) + { + lastLine = out.getFilePointer(); + } + } + + out.seek(lastLine); + } + + int seqIndex = 0; + Vector sequences = align.getSequences(); + + while (seqIndex < sequences.size()) + { + ArrayList ids = new ArrayList(); + + for (int i = 0; (seqIndex < sequences.size()) && (i < 50); + seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + } + + tryLocalCacheFirst(ids, align); + + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + + for (int i = 0; i < ids.size(); i++) + remainingIds.append(ids.get(i) + ";"); + + EBIFetchClient ebi = new EBIFetchClient(); + String[] result = ebi.fetchData(remainingIds.toString(), + "xml", null); + + if (result != null) + { + ReadUniprotFile(result, out, align); + } + } + } + + if (out != null) + { + out.writeBytes("\n"); + out.close(); + } + } + catch (Exception ex) + { + ex.printStackTrace(); + } - if (ids.size() > 0) - { - StringBuffer remainingIds = new StringBuffer("uniprot:"); - for (int i = 0; i < ids.size(); i++) - remainingIds.append(ids.get(i) + ";"); + findMissingIds(align); - EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); + if (sbuffer.length() > 0) + { + output.setText( + "Your sequences have been matched to Uniprot. Some of the ids have been\n" + + "altered, most likely the start/end residue will have been updated.\n" + + "Save your alignment to maintain the updated id.\n\n" + + sbuffer.toString()); + Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300); + } - if(result!=null) - ReadUniprotFile(result, out, align); - } + if (unknownSequences.size() > 0) + { + //ignore for now!!!!!!!!!! + // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); + } + jalview.gui.PaintRefresher.Refresh(null, align); } - if (out != null) + /** + * DOCUMENT ME! + * + * @param result DOCUMENT ME! + * @param out DOCUMENT ME! + * @param align DOCUMENT ME! + */ + void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) { - out.writeBytes("\n"); - out.close(); + SequenceI sequence = null; + Vector features = null; + String type; + String description; + String status; + String start; + String end; + String pdb = null; + + for (int r = 0; r < result.length; r++) + { + if ((sequence == null) && (result[r].indexOf("") > -1)) + { + long filePointer = 0; + + if (out != null) + { + try + { + filePointer = out.getFilePointer(); + out.writeBytes("\n"); + } + catch (Exception ex) + { + } + } + + String seqName = parseElement(result[r], "", out); + sequence = align.findName(seqName); + + if (sequence == null) + { + sequence = align.findName(seqName.substring(0, + seqName.indexOf('_'))); + + if (sequence != null) + { + sbuffer.append("changing " + sequence.getName() + + " to " + seqName + "\n"); + sequence.setName(seqName); + } + } + + if (sequence == null) + { + sbuffer.append("UNIPROT updated suggestion is " + + result[r] + "\n"); + sequence = align.findName(result[r]); + + // this entry has been suggested by ebi. + // doesn't match id in alignment file + try + { + out.setLength(filePointer); + } + catch (Exception ex) + { + } + + // now skip to next entry + while (result[r].indexOf("") == -1) + r++; + } + + features = new Vector(); + type = ""; + start = "0"; + end = "0"; + description = ""; + status = ""; + pdb = ""; + } + + if (sequence == null) + { + continue; + } + + if (result[r].indexOf(" -1) + { + pdb = parseValue(result[r], "value=", out); + sequence.setPDBId(pdb); + } + + if (result[r].indexOf("feature type") > -1) + { + type = parseValue(result[r], "type=", out); + description = parseValue(result[r], "description=", null); + status = parseValue(result[r], "status=", null); + + while (result[r].indexOf("position") == -1) + { + r++; // + } + + // r++; + if (result[r].indexOf("begin") > -1) + { + start = parseValue(result[r], "position=", out); + end = parseValue(result[++r], "position=", out); + } + else + { + start = parseValue(result[r], "position=", out); + end = parseValue(result[r], "position=", null); + } + + int sstart = Integer.parseInt(start); + int eend = Integer.parseInt(end); + + if (out != null) + { + try + { + out.writeBytes("\n"); + } + catch (Exception ex) + { + } + } + + SequenceFeature sf = new SequenceFeature(type, sstart, eend, + description, status); + features.add(sf); + } + + if (result[r].indexOf(" -1) + { + StringBuffer seqString = new StringBuffer(); + + if (out != null) + { + try + { + out.writeBytes(result[r] + "\n"); + } + catch (Exception ex) + { + } + } + + while (result[++r].indexOf("") == -1) + { + seqString.append(result[r]); + + if (out != null) + { + try + { + out.writeBytes(result[r] + "\n"); + } + catch (Exception ex) + { + } + } + } + + if (out != null) + { + try + { + out.writeBytes(result[r] + "\n"); + } + catch (Exception ex) + { + } + } + + StringBuffer nonGapped = new StringBuffer(); + + for (int i = 0; i < sequence.getSequence().length(); i++) + { + if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) + { + nonGapped.append(sequence.getCharAt(i)); + } + } + + int absStart = seqString.toString().indexOf(nonGapped.toString()); + + if (absStart == -1) + { + unknownSequences.add(sequence.getName()); + features = null; + sbuffer.append(sequence.getName() + + " SEQUENCE NOT %100 MATCH \n"); + + continue; + } + + int absEnd = absStart + nonGapped.toString().length(); + absStart += 1; + + if ((absStart != sequence.getStart()) || + (absEnd != sequence.getEnd())) + { + sbuffer.append("Updated: " + sequence.getName() + " " + + sequence.getStart() + "/" + sequence.getEnd() + + " to " + absStart + "/" + absEnd + "\n"); + } + + sequence.setStart(absStart); + sequence.setEnd(absEnd); + } + + if (result[r].indexOf("") > -1) + { + if (features != null) + { + sequence.setSequenceFeatures(features); + } + + features = null; + sequence = null; + + if (out != null) + { + try + { + out.writeBytes("\n"); + } + catch (Exception ex) + { + } + } + } + } } - }catch(Exception ex){ex.printStackTrace();} - - ap.repaint(); - findMissingIds(align); - if(sbuffer.length()>0) - { - outputFrame.setContentPane(output); - output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" - +"altered, most likely the start/end residue will have been updated.\n" - +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); - Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); - - } - - if(unknownSequences.size()>0) - { - //ignore for now!!!!!!!!!! - // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); - } - -} - -void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) -{ - SequenceI sequence = null; - Vector features = null; - String type, description, status, start, end, pdb = null; - - for (int r = 0; r < result.length; r++) - { - if(sequence==null && result[r].indexOf("")>-1) + /** + * DOCUMENT ME! + * + * @param align DOCUMENT ME! + */ + void findMissingIds(AlignmentI align) { - long filePointer = 0; - - if(out!=null) - try{ - filePointer=out.getFilePointer(); - out.writeBytes("\n"); - }catch(Exception ex){} - - String seqName = parseElement( result[r], "" , out); - sequence = align.findName( seqName ) ; - if(sequence==null) - { - sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); - if(sequence!=null) + String data; + ArrayList cachedIds = new ArrayList(); + + try + { + BufferedReader in = new BufferedReader(new FileReader( + jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + + while ((data = in.readLine()) != null) + { + if (data.indexOf("name") > -1) + { + String name = parseElement(data, "", null); + cachedIds.add(name); + } + } + } + catch (Exception ex) { - sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); - sequence.setName(seqName); + ex.printStackTrace(); } - } - if(sequence==null) - { - sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); - sequence = align.findName( result[r] ) ; - - // this entry has been suggested by ebi. - // doesn't match id in alignment file - try { out.setLength(filePointer); } catch (Exception ex) {} - // now skip to next entry - while( result[r].indexOf("")==-1) - r++; - } - - features = new Vector(); - type=""; start="0"; end="0"; description=""; status=""; pdb=""; + for (int i = 0; i < align.getHeight(); i++) + if (!cachedIds.contains(align.getSequenceAt(i).getName())) + { + unknownSequences.add(align.getSequenceAt(i).getName()); + } } - if(sequence==null) - continue; - - if( result[r].indexOf("-1) - { - pdb = parseValue( result[r], "value=" , out); - sequence.setPDBId(pdb); - } - - if(result[r].indexOf("feature type")>-1) - { - type = parseValue( result[r], "type=" , out); - description = parseValue( result[r], "description=" , null ); - status = parseValue ( result[r], "status=", null); - - while( result[r].indexOf("position")==-1) - { - r++; // - } - // r++; - if(result[r].indexOf("begin")>-1) - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[++r], "position=" , out); - } - else - { - start = parseValue( result[r], "position=" , out); - end = parseValue( result[r], "position=" , null); - } - int sstart = Integer.parseInt(start); - int eend = Integer.parseInt(end); - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - SequenceFeature sf = new SequenceFeature(type, - sstart, - eend, - description, - status); - features.add(sf); - } - - if(result[r].indexOf("-1) - { - StringBuffer seqString = new StringBuffer(); - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - while(result[++r].indexOf("")==-1) - { - seqString.append(result[r]); - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - } - - if(out!=null) - try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} - - StringBuffer nonGapped = new StringBuffer(); - for (int i = 0; i < sequence.getSequence().length(); i++) - { - if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) - nonGapped.append(sequence.getCharAt(i)); - } - - int absStart = seqString.toString().indexOf(nonGapped.toString()); - if(absStart==-1) - { - unknownSequences.add(sequence.getName()); - features = null; - sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); - continue; - } - - int absEnd = absStart + nonGapped.toString().length(); - absStart+=1; - - if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) - sbuffer.append("Updated: "+sequence.getName()+" "+ - sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); - - - sequence.setStart(absStart); - sequence.setEnd(absEnd); - - } - - if(result[r].indexOf("")>-1) - { - if(features!=null) - sequence.setSequenceFeatures( features ); - features = null; - sequence = null; - if(out!=null) - try{ out.writeBytes("\n"); }catch(Exception ex){} - - } - } -} - -void findMissingIds(AlignmentI align) -{ - String data; - ArrayList cachedIds = new ArrayList(); - - try - { - BufferedReader in = new BufferedReader( - new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); - - while ( (data = in.readLine()) != null) + /** + * DOCUMENT ME! + * + * @param ids DOCUMENT ME! + * @param align DOCUMENT ME! + */ + void tryLocalCacheFirst(ArrayList ids, AlignmentI align) { - if (data.indexOf("name") > -1) - { - String name = parseElement(data, "", null); - cachedIds.add(name); - } - } - } - catch (Exception ex) - { ex.printStackTrace(); } - - for(int i=0; i -1) + { + String name = parseElement(data, "", null); + + if (ids.contains(name)) + { + cacheData.add(""); + cacheData.add(data); + + while (data.indexOf("") == -1) + { + data = in.readLine(); + cacheData.add(data); + } + + cacheData.add(data); + + ids.remove(name); + } + } + } + } + catch (Exception ex) + { + ex.printStackTrace(); + } -} + String[] localData = new String[cacheData.size()]; + cacheData.toArray(localData); -void tryLocalCacheFirst(ArrayList ids, AlignmentI align) -{ - ArrayList cacheData = new ArrayList(); - try{ - BufferedReader in = new BufferedReader( - new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); - - // read through cache file, if the cache has sequences we're looking for - // add the lines to a new String array, Readthis new array and - // make sure we remove the ids from the list to retrieve from EBI - String data; - while( ( data=in.readLine())!=null) - { - if(data.indexOf("name")>-1) - { - String name = parseElement( data, "" , null) ; - if(ids.contains( name ) ) + if ((localData != null) && (localData.length > 0)) { - cacheData.add(""); - cacheData.add(data); - while( data.indexOf("")==-1) - { - data = in.readLine(); - cacheData.add(data); - } - cacheData.add(data); - - ids.remove( name ); + ReadUniprotFile(localData, null, align); } - } } - } - catch(Exception ex){ex.printStackTrace();} - - String [] localData = new String[cacheData.size()]; - cacheData.toArray( localData ); - if(localData!=null && localData.length>0) - ReadUniprotFile(localData, null, align); -} - - -String parseValue(String line, String tag, RandomAccessFile out) -{ - if(out!=null) - try{ out.writeBytes(line+"\n"); }catch(Exception ex){} + /** + * DOCUMENT ME! + * + * @param line DOCUMENT ME! + * @param tag DOCUMENT ME! + * @param out DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + String parseValue(String line, String tag, RandomAccessFile out) + { + if (out != null) + { + try + { + out.writeBytes(line + "\n"); + } + catch (Exception ex) + { + } + } - int index = line.indexOf(tag)+tag.length()+1; - if(index==tag.length()) - return ""; + int index = line.indexOf(tag) + tag.length() + 1; - return line.substring( index, line.indexOf("\"", index+1) ); -} + if (index == tag.length()) + { + return ""; + } + return line.substring(index, line.indexOf("\"", index + 1)); + } -String parseElement(String line, String tag, RandomAccessFile out) -{ - if (out != null) - try + /** + * DOCUMENT ME! + * + * @param line DOCUMENT ME! + * @param tag DOCUMENT ME! + * @param out DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + String parseElement(String line, String tag, RandomAccessFile out) { - out.writeBytes(line + "\n"); - } - catch (Exception ex) - {} + if (out != null) + { + try + { + out.writeBytes(line + "\n"); + } + catch (Exception ex) + { + } + } - int index = line.indexOf(tag) + tag.length(); - return line.substring(index, line.indexOf("