package jalview.analysis;
import java.io.*;
import java.util.*;
import jalview.io.*;
import jalview.gui.*;
import jalview.datamodel.*;
public class SequenceFeatureFetcher implements Runnable
{
AlignmentI align;
AlignmentPanel ap;
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
this.align = align;
this.ap = ap;
Thread thread = new Thread(this);
thread.start();
}
public void run()
{
String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
RandomAccessFile out = null;
try{
if (cache == null)
{
jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
}
File test = new File(cache);
if( !test.exists() )
{
out = new RandomAccessFile(cache, "rw");
out.writeBytes("\n");
out.writeBytes("\n");
}
else
{
out = new RandomAccessFile(cache, "rw");
// open exisiting cache and remove from the end
long lastLine = 0;
String data;
while ( (data = out.readLine()) != null)
{
if (data.indexOf("") > -1)
lastLine = out.getFilePointer();
}
out.seek(lastLine);
}
int seqIndex = 0;
Vector sequences = align.getSequences();
while (seqIndex < sequences.size())
{
ArrayList ids = new ArrayList();
for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
{
SequenceI sequence = (SequenceI) sequences.get(seqIndex);
ids.add(sequence.getName());
}
tryLocalCacheFirst(ids, align);
if (ids.size() > 0)
{
StringBuffer remainingIds = new StringBuffer("uniprot:");
for (int i = 0; i < ids.size(); i++)
remainingIds.append(ids.get(i) + ";");
EBIFetchClient ebi = new EBIFetchClient();
String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
if(result!=null)
ReadUniprotFile(result, out, align);
}
}
if (out != null)
{
out.writeBytes("\n");
out.close();
}
}catch(Exception ex){ex.printStackTrace();}
}
void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
{
SequenceI sequence = null;
Vector features = null;
String type, description, status, start, end, pdb = null;
for (int r = 0; r < result.length; r++)
{
if(sequence==null && result[r].indexOf("")>-1)
{
long filePointer = 0;
if(out!=null)
try{
filePointer=out.getFilePointer();
out.writeBytes("\n");
}catch(Exception ex){}
sequence = align.findName( parseElement( result[r], "" , out)) ;
if(sequence==null)
{
System.out.println("Couldnt find sequence id. Suggestion is "+result[r]);
// this entry has been suggested by ebi.
// doesn't match id in alignment file
try { out.setLength(filePointer); } catch (Exception ex) {}
// now skip to next entry
while( result[r].indexOf("")==-1)
r++;
}
features = new Vector();
type=""; start="0"; end="0"; description=""; status=""; pdb="";
}
if(sequence==null)
continue;
if( result[r].indexOf("-1)
{
pdb = parseValue( result[r], "value=" , out);
sequence.setPDBId(pdb);
}
if(result[r].indexOf("feature type")>-1)
{
type = parseValue( result[r], "type=" , out);
description = parseValue( result[r], "description=" , null );
status = parseValue ( result[r], "status=", null);
while( result[r].indexOf("position")==-1)
{
r++; //
}
// r++;
if(result[r].indexOf("begin")>-1)
{
start = parseValue( result[r], "position=" , out);
end = parseValue( result[++r], "position=" , out);
}
else
{
start = parseValue( result[r], "position=" , out);
end = parseValue( result[r], "position=" , null);
}
int sstart = Integer.parseInt(start);
int eend = Integer.parseInt(end);
if(out!=null)
try{ out.writeBytes("\n"); }catch(Exception ex){}
if(sstart>=sequence.getStart() && eend<=sequence.getEnd())
{
SequenceFeature sf = new SequenceFeature(type,
sstart,
eend,
description,
status);
features.add(sf);
}
}
if(result[r].indexOf("")>-1)
{
if(features!=null)
sequence.setSequenceFeatures( features );
features = null;
sequence = null;
if(out!=null)
try{ out.writeBytes("\n"); }catch(Exception ex){}
}
}
ap.RefreshPanels();
}
void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
{
ArrayList cacheData = new ArrayList();
try{
BufferedReader in = new BufferedReader(
new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
// read through cache file, if the cache has sequences we're looking for
// add the lines to a new String array, Readthis new array and
// make sure we remove the ids from the list to retrieve from EBI
String data;
while( ( data=in.readLine())!=null)
{
if(data.indexOf("name")>-1)
{
String name = parseElement( data, "" , null) ;
if(ids.contains( name ) )
{
cacheData.add("");
cacheData.add(data);
while( data.indexOf("")==-1)
{
data = in.readLine();
cacheData.add(data);
}
cacheData.add(data);
ids.remove( name );
}
}
}
}
catch(Exception ex){ex.printStackTrace();}
String [] localData = new String[cacheData.size()];
cacheData.toArray( localData );
if(localData!=null && localData.length>0)
ReadUniprotFile(localData, null, align);
}
String parseValue(String line, String tag, RandomAccessFile out)
{
if(out!=null)
try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
int index = line.indexOf(tag)+tag.length()+1;
if(index==tag.length())
return "";
return line.substring( index, line.indexOf("\"", index+1) );
}
String parseElement(String line, String tag, RandomAccessFile out)
{
if (out != null)
try
{
out.writeBytes(line + "\n");
}
catch (Exception ex)
{}
int index = line.indexOf(tag) + tag.length();
return line.substring(index, line.indexOf(""));
}
}