/*
* Jalview - A Sequence Alignment Editor and Viewer
* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package jalview.io;
import jalview.datamodel.*;
import jalview.gui.*;
import jalview.io.*;
import java.io.*;
import java.util.*;
/**
* DOCUMENT ME!
*
* @author $author$
* @version $Revision$
*/
public class SequenceFeatureFetcher implements Runnable
{
AlignmentI align;
AlignmentPanel ap;
ArrayList unknownSequences;
CutAndPasteTransfer output = new CutAndPasteTransfer();
StringBuffer sbuffer = new StringBuffer();
/**
* Creates a new SequenceFeatureFetcher object.
*
* @param align DOCUMENT ME!
* @param ap DOCUMENT ME!
*/
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
unknownSequences = new ArrayList();
this.align = align;
this.ap = ap;
Thread thread = new Thread(this);
thread.start();
}
/**
* DOCUMENT ME!
*/
public void run()
{
RandomAccessFile out = null;
try
{
String cache = System.getProperty("user.home") +
"/.jalview.uniprot.xml";
File test = new File(cache);
if (!test.exists())
{
out = new RandomAccessFile(cache, "rw");
out.writeBytes("\n");
out.writeBytes("\n");
}
else
{
out = new RandomAccessFile(cache, "rw");
// open exisiting cache and remove from the end
long lastLine = 0;
String data;
while ((data = out.readLine()) != null)
{
if (data.indexOf("") > -1)
{
lastLine = out.getFilePointer();
}
}
out.seek(lastLine);
}
int seqIndex = 0;
Vector sequences = align.getSequences();
while (seqIndex < sequences.size())
{
ArrayList ids = new ArrayList();
for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
seqIndex++, i++)
{
SequenceI sequence = (SequenceI) sequences.get(seqIndex);
ids.add(sequence.getName());
}
tryLocalCacheFirst(ids, align);
if (ids.size() > 0)
{
StringBuffer remainingIds = new StringBuffer("uniprot:");
for (int i = 0; i < ids.size(); i++)
remainingIds.append(ids.get(i) + ";");
EBIFetchClient ebi = new EBIFetchClient();
String[] result = ebi.fetchData(remainingIds.toString(),
"xml", null);
if (result != null)
{
ReadUniprotFile(result, out, align);
}
}
}
if (out != null)
{
out.writeBytes("\n");
out.close();
}
}
catch (Exception ex)
{
ex.printStackTrace();
}
findMissingIds(align);
if (sbuffer.length() > 0)
{
output.setText(
"Your sequences have been matched to Uniprot. Some of the ids have been\n" +
"altered, most likely the start/end residue will have been updated.\n" +
"Save your alignment to maintain the updated id.\n\n" +
sbuffer.toString());
Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
}
if (unknownSequences.size() > 0)
{
//ignore for now!!!!!!!!!!
// WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
}
jalview.gui.PaintRefresher.Refresh(null, align);
}
/**
* DOCUMENT ME!
*
* @param result DOCUMENT ME!
* @param out DOCUMENT ME!
* @param align DOCUMENT ME!
*/
void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align)
{
SequenceI sequence = null;
Vector features = null;
String type;
String description;
String status;
String start;
String end;
String pdb = null;
for (int r = 0; r < result.length; r++)
{
if ((sequence == null) && (result[r].indexOf("") > -1))
{
long filePointer = 0;
if (out != null)
{
try
{
filePointer = out.getFilePointer();
out.writeBytes("\n");
}
catch (Exception ex)
{
}
}
String seqName = parseElement(result[r], "", out);
sequence = align.findName(seqName);
if (sequence == null)
{
sequence = align.findName(seqName.substring(0,
seqName.indexOf('_')));
if (sequence != null)
{
sbuffer.append("changing " + sequence.getName() +
" to " + seqName + "\n");
sequence.setName(seqName);
}
}
if (sequence == null)
{
sbuffer.append("UNIPROT updated suggestion is " +
result[r] + "\n");
sequence = align.findName(result[r]);
// this entry has been suggested by ebi.
// doesn't match id in alignment file
try
{
out.setLength(filePointer);
}
catch (Exception ex)
{
}
// now skip to next entry
while (result[r].indexOf("") == -1)
r++;
}
features = new Vector();
type = "";
start = "0";
end = "0";
description = "";
status = "";
pdb = "";
}
if (sequence == null)
{
continue;
}
if (result[r].indexOf(" -1)
{
pdb = parseValue(result[r], "value=", out);
sequence.setPDBId(pdb);
}
if (result[r].indexOf("feature type") > -1)
{
type = parseValue(result[r], "type=", out);
description = parseValue(result[r], "description=", null);
status = parseValue(result[r], "status=", null);
while (result[r].indexOf("position") == -1)
{
r++; //
}
// r++;
if (result[r].indexOf("begin") > -1)
{
start = parseValue(result[r], "position=", out);
end = parseValue(result[++r], "position=", out);
}
else
{
start = parseValue(result[r], "position=", out);
end = parseValue(result[r], "position=", null);
}
int sstart = Integer.parseInt(start);
int eend = Integer.parseInt(end);
if (out != null)
{
try
{
out.writeBytes("\n");
}
catch (Exception ex)
{
}
}
SequenceFeature sf = new SequenceFeature(type, sstart, eend,
description, status);
features.add(sf);
}
if (result[r].indexOf(" -1)
{
StringBuffer seqString = new StringBuffer();
if (out != null)
{
try
{
out.writeBytes(result[r] + "\n");
}
catch (Exception ex)
{
}
}
while (result[++r].indexOf("") == -1)
{
seqString.append(result[r]);
if (out != null)
{
try
{
out.writeBytes(result[r] + "\n");
}
catch (Exception ex)
{
}
}
}
if (out != null)
{
try
{
out.writeBytes(result[r] + "\n");
}
catch (Exception ex)
{
}
}
StringBuffer nonGapped = new StringBuffer();
for (int i = 0; i < sequence.getSequence().length(); i++)
{
if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
{
nonGapped.append(sequence.getCharAt(i));
}
}
int absStart = seqString.toString().indexOf(nonGapped.toString());
if (absStart == -1)
{
unknownSequences.add(sequence.getName());
features = null;
sbuffer.append(sequence.getName() +
" SEQUENCE NOT %100 MATCH \n");
continue;
}
int absEnd = absStart + nonGapped.toString().length();
absStart += 1;
if ((absStart != sequence.getStart()) ||
(absEnd != sequence.getEnd()))
{
sbuffer.append("Updated: " + sequence.getName() + " " +
sequence.getStart() + "/" + sequence.getEnd() +
" to " + absStart + "/" + absEnd + "\n");
}
sequence.setStart(absStart);
sequence.setEnd(absEnd);
}
if (result[r].indexOf("") > -1)
{
if (features != null)
{
sequence.setSequenceFeatures(features);
}
features = null;
sequence = null;
if (out != null)
{
try
{
out.writeBytes("\n");
}
catch (Exception ex)
{
}
}
}
}
}
/**
* DOCUMENT ME!
*
* @param align DOCUMENT ME!
*/
void findMissingIds(AlignmentI align)
{
String data;
ArrayList cachedIds = new ArrayList();
try
{
if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)
return;
BufferedReader in = new BufferedReader(new FileReader(
jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
while ((data = in.readLine()) != null)
{
if (data.indexOf("name") > -1)
{
String name = parseElement(data, "", null);
cachedIds.add(name);
}
}
}
catch (Exception ex)
{
ex.printStackTrace();
}
for (int i = 0; i < align.getHeight(); i++)
if (!cachedIds.contains(align.getSequenceAt(i).getName()))
{
unknownSequences.add(align.getSequenceAt(i).getName());
}
}
/**
* DOCUMENT ME!
*
* @param ids DOCUMENT ME!
* @param align DOCUMENT ME!
*/
void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
{
ArrayList cacheData = new ArrayList();
try
{
if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)
return;
BufferedReader in = new BufferedReader(new FileReader(
jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
// read through cache file, if the cache has sequences we're looking for
// add the lines to a new String array, Readthis new array and
// make sure we remove the ids from the list to retrieve from EBI
String data;
while ((data = in.readLine()) != null)
{
if (data.indexOf("name") > -1)
{
String name = parseElement(data, "", null);
if (ids.contains(name))
{
cacheData.add("");
cacheData.add(data);
while (data.indexOf("") == -1)
{
data = in.readLine();
cacheData.add(data);
}
cacheData.add(data);
ids.remove(name);
}
}
}
}
catch (Exception ex)
{
ex.printStackTrace();
}
String[] localData = new String[cacheData.size()];
cacheData.toArray(localData);
if ((localData != null) && (localData.length > 0))
{
ReadUniprotFile(localData, null, align);
}
}
/**
* DOCUMENT ME!
*
* @param line DOCUMENT ME!
* @param tag DOCUMENT ME!
* @param out DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
String parseValue(String line, String tag, RandomAccessFile out)
{
if (out != null)
{
try
{
out.writeBytes(line + "\n");
}
catch (Exception ex)
{
}
}
int index = line.indexOf(tag) + tag.length() + 1;
if (index == tag.length())
{
return "";
}
return line.substring(index, line.indexOf("\"", index + 1));
}
/**
* DOCUMENT ME!
*
* @param line DOCUMENT ME!
* @param tag DOCUMENT ME!
* @param out DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
String parseElement(String line, String tag, RandomAccessFile out)
{
if (out != null)
{
try
{
out.writeBytes(line + "\n");
}
catch (Exception ex)
{
}
}
int index = line.indexOf(tag) + tag.length();
return line.substring(index, line.indexOf(""));
}
}