X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=0c82f6fb41cd813c1a9ae04d21bc5dacb7bbe5f3;hb=734927d0150e9d43593e5a2aea1fc06613a9d873;hp=b8300a059a5135d1dae592f12ce15e958e454ff8;hpb=99c58ee0ae2a848f982552e53feaf6d5cb9925e5;p=jalview.git
diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java
index b8300a0..0c82f6f 100755
--- a/src/jalview/io/SequenceFeatureFetcher.java
+++ b/src/jalview/io/SequenceFeatureFetcher.java
@@ -16,372 +16,548 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
-
package jalview.io;
-import java.io.*;
-import java.util.*;
-import javax.swing.*;
-import jalview.io.*;
-import jalview.gui.*;
import jalview.datamodel.*;
-public class SequenceFeatureFetcher implements Runnable
-{
- AlignmentI align;
- AlignmentPanel ap;
- ArrayList unknownSequences;
- CutAndPasteTransfer output = new CutAndPasteTransfer();
- StringBuffer sbuffer = new StringBuffer();
-
- public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
- {
- unknownSequences = new ArrayList();
- this.align = align;
- this.ap = ap;
- Thread thread = new Thread(this);
- thread.start();
- }
-
- public void run()
-{
-
- String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
+import jalview.gui.*;
- RandomAccessFile out = null;
+import jalview.io.*;
- try{
- if (cache == null)
- {
- jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
- cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
- }
+import java.io.*;
+import java.util.*;
- File test = new File(cache);
- if( !test.exists() )
- {
- out = new RandomAccessFile(cache, "rw");
- out.writeBytes("\n");
- out.writeBytes("\n");
- }
- else
+/**
+ * DOCUMENT ME!
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class SequenceFeatureFetcher implements Runnable
+{
+ AlignmentI align;
+ AlignmentPanel ap;
+ ArrayList unknownSequences;
+ CutAndPasteTransfer output = new CutAndPasteTransfer();
+ StringBuffer sbuffer = new StringBuffer();
+
+ /**
+ * Creates a new SequenceFeatureFetcher object.
+ *
+ * @param align DOCUMENT ME!
+ * @param ap DOCUMENT ME!
+ */
+ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
- out = new RandomAccessFile(cache, "rw");
- // open exisiting cache and remove from the end
- long lastLine = 0;
- String data;
- while ( (data = out.readLine()) != null)
- {
- if (data.indexOf("") > -1)
- lastLine = out.getFilePointer();
-
- }
- out.seek(lastLine);
- }
+ unknownSequences = new ArrayList();
+ this.align = align;
+ this.ap = ap;
- int seqIndex = 0;
- Vector sequences = align.getSequences();
+ Thread thread = new Thread(this);
+ thread.start();
+ }
- while (seqIndex < sequences.size())
+ /**
+ * DOCUMENT ME!
+ */
+ public void run()
{
- ArrayList ids = new ArrayList();
- for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
- {
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);
- ids.add(sequence.getName());
- }
+ RandomAccessFile out = null;
- tryLocalCacheFirst(ids, align);
+ try
+ {
+ String cache = System.getProperty("user.home") +
+ "/.jalview.uniprot.xml";
+
+ File test = new File(cache);
+
+ if (!test.exists())
+ {
+ out = new RandomAccessFile(cache, "rw");
+ out.writeBytes("\n");
+ out.writeBytes("\n");
+ }
+ else
+ {
+ out = new RandomAccessFile(cache, "rw");
+
+ // open exisiting cache and remove from the end
+ long lastLine = 0;
+ String data;
+
+ while ((data = out.readLine()) != null)
+ {
+ if (data.indexOf("") > -1)
+ {
+ lastLine = out.getFilePointer();
+ }
+ }
+
+ out.seek(lastLine);
+ }
+
+ int seqIndex = 0;
+ Vector sequences = align.getSequences();
+
+ while (seqIndex < sequences.size())
+ {
+ ArrayList ids = new ArrayList();
+
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
+ seqIndex++, i++)
+ {
+ SequenceI sequence = (SequenceI) sequences.get(seqIndex);
+ ids.add(sequence.getName());
+ }
+
+ tryLocalCacheFirst(ids, align);
+
+ if (ids.size() > 0)
+ {
+ StringBuffer remainingIds = new StringBuffer("uniprot:");
+
+ for (int i = 0; i < ids.size(); i++)
+ remainingIds.append(ids.get(i) + ";");
+
+ EBIFetchClient ebi = new EBIFetchClient();
+ String[] result = ebi.fetchData(remainingIds.toString(),
+ "xml", null);
+
+ if (result != null)
+ {
+ ReadUniprotFile(result, out, align);
+ }
+ }
+ }
+
+ if (out != null)
+ {
+ out.writeBytes("\n");
+ out.close();
+ }
+ }
+ catch (Exception ex)
+ {
+ ex.printStackTrace();
+ }
- if (ids.size() > 0)
- {
- StringBuffer remainingIds = new StringBuffer("uniprot:");
- for (int i = 0; i < ids.size(); i++)
- remainingIds.append(ids.get(i) + ";");
+ findMissingIds(align);
- EBIFetchClient ebi = new EBIFetchClient();
- String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
+ if (sbuffer.length() > 0)
+ {
+ output.setText(
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +
+ "altered, most likely the start/end residue will have been updated.\n" +
+ "Save your alignment to maintain the updated id.\n\n" +
+ sbuffer.toString());
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
+ }
- if(result!=null)
- ReadUniprotFile(result, out, align);
- }
+ if (unknownSequences.size() > 0)
+ {
+ //ignore for now!!!!!!!!!!
+ // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
+ }
+ jalview.gui.PaintRefresher.Refresh(null, align);
}
- if (out != null)
+ /**
+ * DOCUMENT ME!
+ *
+ * @param result DOCUMENT ME!
+ * @param out DOCUMENT ME!
+ * @param align DOCUMENT ME!
+ */
+ void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align)
{
- out.writeBytes("\n");
- out.close();
+ SequenceI sequence = null;
+ Vector features = null;
+ String type;
+ String description;
+ String status;
+ String start;
+ String end;
+ String pdb = null;
+
+ for (int r = 0; r < result.length; r++)
+ {
+ if ((sequence == null) && (result[r].indexOf("") > -1))
+ {
+ long filePointer = 0;
+
+ if (out != null)
+ {
+ try
+ {
+ filePointer = out.getFilePointer();
+ out.writeBytes("\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+
+ String seqName = parseElement(result[r], "", out);
+ sequence = align.findName(seqName);
+
+ if (sequence == null)
+ {
+ sequence = align.findName(seqName.substring(0,
+ seqName.indexOf('_')));
+
+ if (sequence != null)
+ {
+ sbuffer.append("changing " + sequence.getName() +
+ " to " + seqName + "\n");
+ sequence.setName(seqName);
+ }
+ }
+
+ if (sequence == null)
+ {
+ sbuffer.append("UNIPROT updated suggestion is " +
+ result[r] + "\n");
+ sequence = align.findName(result[r]);
+
+ // this entry has been suggested by ebi.
+ // doesn't match id in alignment file
+ try
+ {
+ out.setLength(filePointer);
+ }
+ catch (Exception ex)
+ {
+ }
+
+ // now skip to next entry
+ while (result[r].indexOf("") == -1)
+ r++;
+ }
+
+ features = new Vector();
+ type = "";
+ start = "0";
+ end = "0";
+ description = "";
+ status = "";
+ pdb = "";
+ }
+
+ if (sequence == null)
+ {
+ continue;
+ }
+
+ if (result[r].indexOf(" -1)
+ {
+ pdb = parseValue(result[r], "value=", out);
+ sequence.setPDBId(pdb);
+ }
+
+ if (result[r].indexOf("feature type") > -1)
+ {
+ type = parseValue(result[r], "type=", out);
+ description = parseValue(result[r], "description=", null);
+ status = parseValue(result[r], "status=", null);
+
+ while (result[r].indexOf("position") == -1)
+ {
+ r++; //
+ }
+
+ // r++;
+ if (result[r].indexOf("begin") > -1)
+ {
+ start = parseValue(result[r], "position=", out);
+ end = parseValue(result[++r], "position=", out);
+ }
+ else
+ {
+ start = parseValue(result[r], "position=", out);
+ end = parseValue(result[r], "position=", null);
+ }
+
+ int sstart = Integer.parseInt(start);
+ int eend = Integer.parseInt(end);
+
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes("\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+
+ SequenceFeature sf = new SequenceFeature(type, sstart, eend,
+ description, status);
+ features.add(sf);
+ }
+
+ if (result[r].indexOf(" -1)
+ {
+ StringBuffer seqString = new StringBuffer();
+
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes(result[r] + "\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+
+ while (result[++r].indexOf("") == -1)
+ {
+ seqString.append(result[r]);
+
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes(result[r] + "\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+ }
+
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes(result[r] + "\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+
+ StringBuffer nonGapped = new StringBuffer();
+
+ for (int i = 0; i < sequence.getSequence().length(); i++)
+ {
+ if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
+ {
+ nonGapped.append(sequence.getCharAt(i));
+ }
+ }
+
+ int absStart = seqString.toString().indexOf(nonGapped.toString());
+
+ if (absStart == -1)
+ {
+ unknownSequences.add(sequence.getName());
+ features = null;
+ sbuffer.append(sequence.getName() +
+ " SEQUENCE NOT %100 MATCH \n");
+
+ continue;
+ }
+
+ int absEnd = absStart + nonGapped.toString().length();
+ absStart += 1;
+
+ if ((absStart != sequence.getStart()) ||
+ (absEnd != sequence.getEnd()))
+ {
+ sbuffer.append("Updated: " + sequence.getName() + " " +
+ sequence.getStart() + "/" + sequence.getEnd() +
+ " to " + absStart + "/" + absEnd + "\n");
+ }
+
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
+ }
+
+ if (result[r].indexOf("") > -1)
+ {
+ if (features != null)
+ {
+ sequence.setSequenceFeatures(features);
+ }
+
+ features = null;
+ sequence = null;
+
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes("\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
+ }
+ }
}
- }catch(Exception ex){ex.printStackTrace();}
-
- ap.repaint();
- findMissingIds(align);
- if(sbuffer.length()>0)
- {
- output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"
- +"altered, most likely the start/end residue will have been updated.\n"
- +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());
- Desktop.addInternalFrame(output, "Sequence names updated ", 600,300);
-
- }
- if(unknownSequences.size()>0)
- {
- //ignore for now!!!!!!!!!!
- // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
- }
-
-}
-
-void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
-{
- SequenceI sequence = null;
- Vector features = null;
- String type, description, status, start, end, pdb = null;
-
-
- for (int r = 0; r < result.length; r++)
- {
- if(sequence==null && result[r].indexOf("")>-1)
+ /**
+ * DOCUMENT ME!
+ *
+ * @param align DOCUMENT ME!
+ */
+ void findMissingIds(AlignmentI align)
{
- long filePointer = 0;
-
- if(out!=null)
- try{
- filePointer=out.getFilePointer();
- out.writeBytes("\n");
- }catch(Exception ex){}
-
- String seqName = parseElement( result[r], "" , out);
- sequence = align.findName( seqName ) ;
- if(sequence==null)
- {
- sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));
- if(sequence!=null)
+ String data;
+ ArrayList cachedIds = new ArrayList();
+
+ try
+ {
+ if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)
+ return;
+
+ BufferedReader in = new BufferedReader(new FileReader(
+ jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
+
+ while ((data = in.readLine()) != null)
+ {
+ if (data.indexOf("name") > -1)
+ {
+ String name = parseElement(data, "", null);
+ cachedIds.add(name);
+ }
+ }
+ }
+ catch (Exception ex)
{
- sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");
- sequence.setName(seqName);
+ ex.printStackTrace();
}
- }
- if(sequence==null)
- {
- sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");
- sequence = align.findName( result[r] ) ;
-
- // this entry has been suggested by ebi.
- // doesn't match id in alignment file
- try { out.setLength(filePointer); } catch (Exception ex) {}
- // now skip to next entry
- while( result[r].indexOf("")==-1)
- r++;
- }
-
- features = new Vector();
- type=""; start="0"; end="0"; description=""; status=""; pdb="";
+ for (int i = 0; i < align.getHeight(); i++)
+ if (!cachedIds.contains(align.getSequenceAt(i).getName()))
+ {
+ unknownSequences.add(align.getSequenceAt(i).getName());
+ }
}
- if(sequence==null)
- continue;
-
- if( result[r].indexOf("-1)
- {
- pdb = parseValue( result[r], "value=" , out);
- sequence.setPDBId(pdb);
- }
-
- if(result[r].indexOf("feature type")>-1)
- {
- type = parseValue( result[r], "type=" , out);
- description = parseValue( result[r], "description=" , null );
- status = parseValue ( result[r], "status=", null);
-
- while( result[r].indexOf("position")==-1)
- {
- r++; //
- }
- // r++;
- if(result[r].indexOf("begin")>-1)
- {
- start = parseValue( result[r], "position=" , out);
- end = parseValue( result[++r], "position=" , out);
- }
- else
- {
- start = parseValue( result[r], "position=" , out);
- end = parseValue( result[r], "position=" , null);
- }
- int sstart = Integer.parseInt(start);
- int eend = Integer.parseInt(end);
- if(out!=null)
- try{ out.writeBytes("\n"); }catch(Exception ex){}
-
- SequenceFeature sf = new SequenceFeature(type,
- sstart,
- eend,
- description,
- status);
- features.add(sf);
- }
-
- if(result[r].indexOf("-1)
- {
- StringBuffer seqString = new StringBuffer();
-
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
-
- while(result[++r].indexOf("")==-1)
- {
- seqString.append(result[r]);
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
- }
-
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
-
- StringBuffer nonGapped = new StringBuffer();
- for (int i = 0; i < sequence.getSequence().length(); i++)
- {
- if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
- nonGapped.append(sequence.getCharAt(i));
- }
-
- int absStart = seqString.toString().indexOf(nonGapped.toString());
- if(absStart==-1)
- {
- unknownSequences.add(sequence.getName());
- features = null;
- sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");
- continue;
- }
-
- int absEnd = absStart + nonGapped.toString().length();
- absStart+=1;
-
- if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())
- sbuffer.append("Updated: "+sequence.getName()+" "+
- sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");
-
-
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
-
- }
-
- if(result[r].indexOf("")>-1)
- {
- if(features!=null)
- sequence.setSequenceFeatures( features );
- features = null;
- sequence = null;
- if(out!=null)
- try{ out.writeBytes("\n"); }catch(Exception ex){}
-
- }
- }
-}
-
-void findMissingIds(AlignmentI align)
-{
- String data;
- ArrayList cachedIds = new ArrayList();
-
- try
- {
- BufferedReader in = new BufferedReader(
- new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
-
- while ( (data = in.readLine()) != null)
+ /**
+ * DOCUMENT ME!
+ *
+ * @param ids DOCUMENT ME!
+ * @param align DOCUMENT ME!
+ */
+ void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
{
- if (data.indexOf("name") > -1)
- {
- String name = parseElement(data, "", null);
- cachedIds.add(name);
- }
- }
- }
- catch (Exception ex)
- { ex.printStackTrace(); }
-
- for(int i=0; i -1)
+ {
+ String name = parseElement(data, "", null);
+
+ if (ids.contains(name))
+ {
+ cacheData.add("");
+ cacheData.add(data);
+
+ while (data.indexOf("") == -1)
+ {
+ data = in.readLine();
+ cacheData.add(data);
+ }
+
+ cacheData.add(data);
+
+ ids.remove(name);
+ }
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ ex.printStackTrace();
+ }
-}
+ String[] localData = new String[cacheData.size()];
+ cacheData.toArray(localData);
-void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
-{
- ArrayList cacheData = new ArrayList();
- try{
- BufferedReader in = new BufferedReader(
- new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
-
- // read through cache file, if the cache has sequences we're looking for
- // add the lines to a new String array, Readthis new array and
- // make sure we remove the ids from the list to retrieve from EBI
- String data;
- while( ( data=in.readLine())!=null)
- {
- if(data.indexOf("name")>-1)
- {
- String name = parseElement( data, "" , null) ;
- if(ids.contains( name ) )
+ if ((localData != null) && (localData.length > 0))
{
- cacheData.add("");
- cacheData.add(data);
- while( data.indexOf("")==-1)
- {
- data = in.readLine();
- cacheData.add(data);
- }
- cacheData.add(data);
-
- ids.remove( name );
+ ReadUniprotFile(localData, null, align);
}
- }
}
- }
- catch(Exception ex){ex.printStackTrace();}
-
- String [] localData = new String[cacheData.size()];
- cacheData.toArray( localData );
- if(localData!=null && localData.length>0)
- ReadUniprotFile(localData, null, align);
-}
-
-
-String parseValue(String line, String tag, RandomAccessFile out)
-{
- if(out!=null)
- try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
+ /**
+ * DOCUMENT ME!
+ *
+ * @param line DOCUMENT ME!
+ * @param tag DOCUMENT ME!
+ * @param out DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ String parseValue(String line, String tag, RandomAccessFile out)
+ {
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes(line + "\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
- int index = line.indexOf(tag)+tag.length()+1;
- if(index==tag.length())
- return "";
+ int index = line.indexOf(tag) + tag.length() + 1;
- return line.substring( index, line.indexOf("\"", index+1) );
-}
+ if (index == tag.length())
+ {
+ return "";
+ }
+ return line.substring(index, line.indexOf("\"", index + 1));
+ }
-String parseElement(String line, String tag, RandomAccessFile out)
-{
- if (out != null)
- try
+ /**
+ * DOCUMENT ME!
+ *
+ * @param line DOCUMENT ME!
+ * @param tag DOCUMENT ME!
+ * @param out DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ String parseElement(String line, String tag, RandomAccessFile out)
{
- out.writeBytes(line + "\n");
- }
- catch (Exception ex)
- {}
+ if (out != null)
+ {
+ try
+ {
+ out.writeBytes(line + "\n");
+ }
+ catch (Exception ex)
+ {
+ }
+ }
- int index = line.indexOf(tag) + tag.length();
- return line.substring(index, line.indexOf(""));
+ int index = line.indexOf(tag) + tag.length();
- }
+ return line.substring(index, line.indexOf(""));
+ }
}