X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FSequenceFeatureFetcher.java;h=2c0aaac9d15fbcd82f26c2987105286679a1c486;hb=ebb52a433edbbdbd31ab82cbb1c59fc116a72b1d;hp=dc8a3cdb460e61629cdcdab846f9f32a9f8b82f3;hpb=44b659b39011c69e40b43610c4a9b94501fd1bd5;p=jalview.git
diff --git a/src/jalview/analysis/SequenceFeatureFetcher.java b/src/jalview/analysis/SequenceFeatureFetcher.java
index dc8a3cd..2c0aaac 100755
--- a/src/jalview/analysis/SequenceFeatureFetcher.java
+++ b/src/jalview/analysis/SequenceFeatureFetcher.java
@@ -2,6 +2,7 @@ package jalview.analysis;
import java.io.*;
import java.util.*;
+import javax.swing.*;
import jalview.io.*;
import jalview.gui.*;
import jalview.datamodel.*;
@@ -10,9 +11,14 @@ public class SequenceFeatureFetcher implements Runnable
{
AlignmentI align;
AlignmentPanel ap;
+ ArrayList unknownSequences;
+ JInternalFrame outputFrame = new JInternalFrame();
+ CutAndPasteTransfer output = new CutAndPasteTransfer(false);
+ StringBuffer sbuffer = new StringBuffer();
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
+ unknownSequences = new ArrayList();
this.align = align;
this.ap = ap;
Thread thread = new Thread(this);
@@ -29,7 +35,8 @@ public class SequenceFeatureFetcher implements Runnable
try{
if (cache == null)
{
- jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home/uniprot.xml"));
+ jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
+ cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
}
@@ -77,7 +84,7 @@ public class SequenceFeatureFetcher implements Runnable
remainingIds.append(ids.get(i) + ";");
EBIFetchClient ebi = new EBIFetchClient();
- String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
+ String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
if(result!=null)
ReadUniprotFile(result, out, align);
@@ -92,6 +99,24 @@ public class SequenceFeatureFetcher implements Runnable
}
}catch(Exception ex){ex.printStackTrace();}
+ ap.repaint();
+ findMissingIds(align);
+ if(sbuffer.length()>0)
+ {
+ output.formatForOutput();
+ outputFrame.setContentPane(output);
+ output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"
+ +"altered, most likely the start/end residue will have been updated.\n"
+ +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());
+ Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300);
+
+ }
+
+ if(unknownSequences.size()>0)
+ {
+ //ignore for now!!!!!!!!!!
+ // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
+ }
}
@@ -114,10 +139,21 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
out.writeBytes("\n");
}catch(Exception ex){}
- sequence = align.findName( parseElement( result[r], "" , out)) ;
+ String seqName = parseElement( result[r], "" , out);
+ sequence = align.findName( seqName ) ;
if(sequence==null)
{
- System.out.println("Couldnt find sequence id. Suggestion is "+result[r]);
+ sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));
+ if(sequence!=null)
+ {
+ sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");
+ sequence.setName(seqName);
+ }
+ }
+ if(sequence==null)
+ {
+ sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");
+ sequence = align.findName( result[r] ) ;
// this entry has been suggested by ebi.
// doesn't match id in alignment file
@@ -167,16 +203,58 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
if(out!=null)
try{ out.writeBytes("\n"); }catch(Exception ex){}
-
- if(sstart>=sequence.getStart() && eend<=sequence.getEnd())
- {
SequenceFeature sf = new SequenceFeature(type,
sstart,
eend,
description,
status);
features.add(sf);
+ }
+
+ if(result[r].indexOf("-1)
+ {
+ StringBuffer seqString = new StringBuffer();
+
+ if(out!=null)
+ try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
+
+ while(result[++r].indexOf("")==-1)
+ {
+ seqString.append(result[r]);
+ if(out!=null)
+ try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
+ }
+
+ if(out!=null)
+ try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
+
+ StringBuffer nonGapped = new StringBuffer();
+ for (int i = 0; i < sequence.getSequence().length(); i++)
+ {
+ if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
+ nonGapped.append(sequence.getCharAt(i));
+ }
+
+ int absStart = seqString.toString().indexOf(nonGapped.toString());
+ if(absStart==-1)
+ {
+ unknownSequences.add(sequence.getName());
+ features = null;
+ sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");
+ continue;
}
+
+ int absEnd = absStart + nonGapped.toString().length();
+ absStart+=1;
+
+ if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())
+ sbuffer.append("Updated: "+sequence.getName()+" "+
+ sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");
+
+
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
+
}
if(result[r].indexOf("")>-1)
@@ -190,8 +268,34 @@ void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
}
}
+}
+
+void findMissingIds(AlignmentI align)
+{
+ String data;
+ ArrayList cachedIds = new ArrayList();
+
+ try
+ {
+ BufferedReader in = new BufferedReader(
+ new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
+
+ while ( (data = in.readLine()) != null)
+ {
+ if (data.indexOf("name") > -1)
+ {
+ String name = parseElement(data, "", null);
+ cachedIds.add(name);
+ }
+ }
+ }
+ catch (Exception ex)
+ { ex.printStackTrace(); }
+
+ for(int i=0; i