package jalview.analysis;
import java.io.*;
import java.util.*;
import javax.swing.*;
import jalview.io.*;
import jalview.gui.*;
import jalview.datamodel.*;
public class SequenceFeatureFetcher implements Runnable
{
AlignmentI align;
AlignmentPanel ap;
ArrayList unknownSequences;
JInternalFrame outputFrame = new JInternalFrame();
CutAndPasteTransfer output = new CutAndPasteTransfer(false);
StringBuffer sbuffer = new StringBuffer();
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
unknownSequences = new ArrayList();
this.align = align;
this.ap = ap;
Thread thread = new Thread(this);
thread.start();
}
public void run()
{
String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
RandomAccessFile out = null;
try{
if (cache == null)
{
jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
}
File test = new File(cache);
if( !test.exists() )
{
out = new RandomAccessFile(cache, "rw");
out.writeBytes("\n");
out.writeBytes("\n");
}
else
{
out = new RandomAccessFile(cache, "rw");
// open exisiting cache and remove from the end
long lastLine = 0;
String data;
while ( (data = out.readLine()) != null)
{
if (data.indexOf("") > -1)
lastLine = out.getFilePointer();
}
out.seek(lastLine);
}
int seqIndex = 0;
Vector sequences = align.getSequences();
while (seqIndex < sequences.size())
{
ArrayList ids = new ArrayList();
for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
{
SequenceI sequence = (SequenceI) sequences.get(seqIndex);
ids.add(sequence.getName());
}
tryLocalCacheFirst(ids, align);
if (ids.size() > 0)
{
StringBuffer remainingIds = new StringBuffer("uniprot:");
for (int i = 0; i < ids.size(); i++)
remainingIds.append(ids.get(i) + ";");
EBIFetchClient ebi = new EBIFetchClient();
String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
if(result!=null)
ReadUniprotFile(result, out, align);
}
}
if (out != null)
{
out.writeBytes("\n");
out.close();
}
}catch(Exception ex){ex.printStackTrace();}
ap.RefreshPanels();
findMissingIds(align);
if(sbuffer.length()>0)
{
output.formatForOutput();
outputFrame.setContentPane(output);
output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"
+"altered, most likely the start/end residue will have been updated.\n"
+"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());
Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300);
}
if(unknownSequences.size()>0)
{
//ignore for now!!!!!!!!!!
// WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
}
}
void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
{
SequenceI sequence = null;
Vector features = null;
String type, description, status, start, end, pdb = null;
for (int r = 0; r < result.length; r++)
{
if(sequence==null && result[r].indexOf("")>-1)
{
long filePointer = 0;
if(out!=null)
try{
filePointer=out.getFilePointer();
out.writeBytes("\n");
}catch(Exception ex){}
String seqName = parseElement( result[r], "" , out);
sequence = align.findName( seqName ) ;
if(sequence==null)
{
sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));
if(sequence!=null)
{
sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");
sequence.setName(seqName);
}
}
if(sequence==null)
{
sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");
sequence = align.findName( result[r] ) ;
// this entry has been suggested by ebi.
// doesn't match id in alignment file
try { out.setLength(filePointer); } catch (Exception ex) {}
// now skip to next entry
while( result[r].indexOf("")==-1)
r++;
}
features = new Vector();
type=""; start="0"; end="0"; description=""; status=""; pdb="";
}
if(sequence==null)
continue;
if( result[r].indexOf("-1)
{
pdb = parseValue( result[r], "value=" , out);
sequence.setPDBId(pdb);
}
if(result[r].indexOf("feature type")>-1)
{
type = parseValue( result[r], "type=" , out);
description = parseValue( result[r], "description=" , null );
status = parseValue ( result[r], "status=", null);
while( result[r].indexOf("position")==-1)
{
r++; //
}
// r++;
if(result[r].indexOf("begin")>-1)
{
start = parseValue( result[r], "position=" , out);
end = parseValue( result[++r], "position=" , out);
}
else
{
start = parseValue( result[r], "position=" , out);
end = parseValue( result[r], "position=" , null);
}
int sstart = Integer.parseInt(start);
int eend = Integer.parseInt(end);
if(out!=null)
try{ out.writeBytes("\n"); }catch(Exception ex){}
SequenceFeature sf = new SequenceFeature(type,
sstart,
eend,
description,
status);
features.add(sf);
}
if(result[r].indexOf("-1)
{
StringBuffer seqString = new StringBuffer();
if(out!=null)
try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
while(result[++r].indexOf("")==-1)
{
seqString.append(result[r]);
if(out!=null)
try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
}
if(out!=null)
try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
StringBuffer nonGapped = new StringBuffer();
for (int i = 0; i < sequence.getSequence().length(); i++)
{
if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
nonGapped.append(sequence.getCharAt(i));
}
int absStart = seqString.toString().indexOf(nonGapped.toString());
if(absStart==-1)
{
unknownSequences.add(sequence.getName());
features = null;
sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");
continue;
}
int absEnd = absStart + nonGapped.toString().length();
absStart+=1;
if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())
sbuffer.append("Updated: "+sequence.getName()+" "+
sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");
sequence.setStart(absStart);
sequence.setEnd(absEnd);
}
if(result[r].indexOf("")>-1)
{
if(features!=null)
sequence.setSequenceFeatures( features );
features = null;
sequence = null;
if(out!=null)
try{ out.writeBytes("\n"); }catch(Exception ex){}
}
}
}
void findMissingIds(AlignmentI align)
{
String data;
ArrayList cachedIds = new ArrayList();
try
{
BufferedReader in = new BufferedReader(
new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
while ( (data = in.readLine()) != null)
{
if (data.indexOf("name") > -1)
{
String name = parseElement(data, "", null);
cachedIds.add(name);
}
}
}
catch (Exception ex)
{ ex.printStackTrace(); }
for(int i=0; i-1)
{
String name = parseElement( data, "" , null) ;
if(ids.contains( name ) )
{
cacheData.add("");
cacheData.add(data);
while( data.indexOf("")==-1)
{
data = in.readLine();
cacheData.add(data);
}
cacheData.add(data);
ids.remove( name );
}
}
}
}
catch(Exception ex){ex.printStackTrace();}
String [] localData = new String[cacheData.size()];
cacheData.toArray( localData );
if(localData!=null && localData.length>0)
ReadUniprotFile(localData, null, align);
}
String parseValue(String line, String tag, RandomAccessFile out)
{
if(out!=null)
try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
int index = line.indexOf(tag)+tag.length()+1;
if(index==tag.length())
return "";
return line.substring( index, line.indexOf("\"", index+1) );
}
String parseElement(String line, String tag, RandomAccessFile out)
{
if (out != null)
try
{
out.writeBytes(line + "\n");
}
catch (Exception ex)
{}
int index = line.indexOf(tag) + tag.length();
return line.substring(index, line.indexOf(""));
}
}