X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FSequenceFeatureFetcher.java;h=2c0aaac9d15fbcd82f26c2987105286679a1c486;hb=ebb52a433edbbdbd31ab82cbb1c59fc116a72b1d;hp=dc8a3cdb460e61629cdcdab846f9f32a9f8b82f3;hpb=44b659b39011c69e40b43610c4a9b94501fd1bd5;p=jalview.git diff --git a/src/jalview/analysis/SequenceFeatureFetcher.java b/src/jalview/analysis/SequenceFeatureFetcher.java index dc8a3cd..2c0aaac 100755 --- a/src/jalview/analysis/SequenceFeatureFetcher.java +++ b/src/jalview/analysis/SequenceFeatureFetcher.java @@ -2,6 +2,7 @@ package jalview.analysis; import java.io.*; import java.util.*; +import javax.swing.*; import jalview.io.*; import jalview.gui.*; import jalview.datamodel.*; @@ -10,9 +11,14 @@ public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentPanel ap; + ArrayList unknownSequences; + JInternalFrame outputFrame = new JInternalFrame(); + CutAndPasteTransfer output = new CutAndPasteTransfer(false); + StringBuffer sbuffer = new StringBuffer(); public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { + unknownSequences = new ArrayList(); this.align = align; this.ap = ap; Thread thread = new Thread(this); @@ -29,7 +35,8 @@ public class SequenceFeatureFetcher implements Runnable try{ if (cache == null) { - jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home/uniprot.xml")); + jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); + cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); } @@ -77,7 +84,7 @@ public class SequenceFeatureFetcher implements Runnable remainingIds.append(ids.get(i) + ";"); EBIFetchClient ebi = new EBIFetchClient(); - String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); + String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); if(result!=null) ReadUniprotFile(result, out, align); @@ -92,6 +99,24 @@ public class SequenceFeatureFetcher implements Runnable } }catch(Exception ex){ex.printStackTrace();} + ap.repaint(); + findMissingIds(align); + if(sbuffer.length()>0) + { + output.formatForOutput(); + outputFrame.setContentPane(output); + output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" + +"altered, most likely the start/end residue will have been updated.\n" + +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); + Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); + + } + + if(unknownSequences.size()>0) + { + //ignore for now!!!!!!!!!! + // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); + } } @@ -114,10 +139,21 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) out.writeBytes("\n"); }catch(Exception ex){} - sequence = align.findName( parseElement( result[r], "" , out)) ; + String seqName = parseElement( result[r], "" , out); + sequence = align.findName( seqName ) ; if(sequence==null) { - System.out.println("Couldnt find sequence id. Suggestion is "+result[r]); + sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); + if(sequence!=null) + { + sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); + sequence.setName(seqName); + } + } + if(sequence==null) + { + sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); + sequence = align.findName( result[r] ) ; // this entry has been suggested by ebi. // doesn't match id in alignment file @@ -167,16 +203,58 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) if(out!=null) try{ out.writeBytes("\n"); }catch(Exception ex){} - - if(sstart>=sequence.getStart() && eend<=sequence.getEnd()) - { SequenceFeature sf = new SequenceFeature(type, sstart, eend, description, status); features.add(sf); + } + + if(result[r].indexOf("-1) + { + StringBuffer seqString = new StringBuffer(); + + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + + while(result[++r].indexOf("")==-1) + { + seqString.append(result[r]); + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + } + + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + + StringBuffer nonGapped = new StringBuffer(); + for (int i = 0; i < sequence.getSequence().length(); i++) + { + if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) + nonGapped.append(sequence.getCharAt(i)); + } + + int absStart = seqString.toString().indexOf(nonGapped.toString()); + if(absStart==-1) + { + unknownSequences.add(sequence.getName()); + features = null; + sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); + continue; } + + int absEnd = absStart + nonGapped.toString().length(); + absStart+=1; + + if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) + sbuffer.append("Updated: "+sequence.getName()+" "+ + sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); + + + sequence.setStart(absStart); + sequence.setEnd(absEnd); + } if(result[r].indexOf("")>-1) @@ -190,8 +268,34 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) } } +} + +void findMissingIds(AlignmentI align) +{ + String data; + ArrayList cachedIds = new ArrayList(); + + try + { + BufferedReader in = new BufferedReader( + new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + + while ( (data = in.readLine()) != null) + { + if (data.indexOf("name") > -1) + { + String name = parseElement(data, "", null); + cachedIds.add(name); + } + } + } + catch (Exception ex) + { ex.printStackTrace(); } + + for(int i=0; i