/*
* Jalview - A Sequence Alignment Editor and Viewer
* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package jalview.io;
import jalview.datamodel.*;
import jalview.gui.*;
import jalview.io.*;
import java.io.*;
import java.util.*;
public class SequenceFeatureFetcher implements Runnable {
AlignmentI align;
AlignmentPanel ap;
ArrayList unknownSequences;
CutAndPasteTransfer output = new CutAndPasteTransfer();
StringBuffer sbuffer = new StringBuffer();
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) {
unknownSequences = new ArrayList();
this.align = align;
this.ap = ap;
Thread thread = new Thread(this);
thread.start();
}
public void run() {
String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
RandomAccessFile out = null;
try {
if (cache == null) {
jalview.bin.Cache.setProperty("UNIPROT_CACHE",
System.getProperty("user.home") + "/uniprot.xml");
cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
}
File test = new File(cache);
if (!test.exists()) {
out = new RandomAccessFile(cache, "rw");
out.writeBytes("\n");
out.writeBytes("\n");
} else {
out = new RandomAccessFile(cache, "rw");
// open exisiting cache and remove from the end
long lastLine = 0;
String data;
while ((data = out.readLine()) != null) {
if (data.indexOf("") > -1) {
lastLine = out.getFilePointer();
}
}
out.seek(lastLine);
}
int seqIndex = 0;
Vector sequences = align.getSequences();
while (seqIndex < sequences.size()) {
ArrayList ids = new ArrayList();
for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
seqIndex++, i++) {
SequenceI sequence = (SequenceI) sequences.get(seqIndex);
ids.add(sequence.getName());
}
tryLocalCacheFirst(ids, align);
if (ids.size() > 0) {
StringBuffer remainingIds = new StringBuffer("uniprot:");
for (int i = 0; i < ids.size(); i++)
remainingIds.append(ids.get(i) + ";");
EBIFetchClient ebi = new EBIFetchClient();
String[] result = ebi.fetchData(remainingIds.toString(),
"xml", null);
if (result != null) {
ReadUniprotFile(result, out, align);
}
}
}
if (out != null) {
out.writeBytes("\n");
out.close();
}
} catch (Exception ex) {
ex.printStackTrace();
}
jalview.gui.PaintRefresher.Refresh(null, align);
findMissingIds(align);
if (sbuffer.length() > 0) {
output.setText(
"Your sequences have been matched to Uniprot. Some of the ids have been\n" +
"altered, most likely the start/end residue will have been updated.\n" +
"Save your alignment to maintain the updated id.\n\n" +
sbuffer.toString());
Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
}
if (unknownSequences.size() > 0) {
//ignore for now!!!!!!!!!!
// WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
}
}
void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) {
SequenceI sequence = null;
Vector features = null;
String type;
String description;
String status;
String start;
String end;
String pdb = null;
for (int r = 0; r < result.length; r++) {
if ((sequence == null) && (result[r].indexOf("") > -1)) {
long filePointer = 0;
if (out != null) {
try {
filePointer = out.getFilePointer();
out.writeBytes("\n");
} catch (Exception ex) {
}
}
String seqName = parseElement(result[r], "", out);
sequence = align.findName(seqName);
if (sequence == null) {
sequence = align.findName(seqName.substring(0,
seqName.indexOf('_')));
if (sequence != null) {
sbuffer.append("changing " + sequence.getName() +
" to " + seqName + "\n");
sequence.setName(seqName);
}
}
if (sequence == null) {
sbuffer.append("UNIPROT updated suggestion is " +
result[r] + "\n");
sequence = align.findName(result[r]);
// this entry has been suggested by ebi.
// doesn't match id in alignment file
try {
out.setLength(filePointer);
} catch (Exception ex) {
}
// now skip to next entry
while (result[r].indexOf("") == -1)
r++;
}
features = new Vector();
type = "";
start = "0";
end = "0";
description = "";
status = "";
pdb = "";
}
if (sequence == null) {
continue;
}
if (result[r].indexOf(" -1) {
pdb = parseValue(result[r], "value=", out);
sequence.setPDBId(pdb);
}
if (result[r].indexOf("feature type") > -1) {
type = parseValue(result[r], "type=", out);
description = parseValue(result[r], "description=", null);
status = parseValue(result[r], "status=", null);
while (result[r].indexOf("position") == -1) {
r++; //
}
// r++;
if (result[r].indexOf("begin") > -1) {
start = parseValue(result[r], "position=", out);
end = parseValue(result[++r], "position=", out);
} else {
start = parseValue(result[r], "position=", out);
end = parseValue(result[r], "position=", null);
}
int sstart = Integer.parseInt(start);
int eend = Integer.parseInt(end);
if (out != null) {
try {
out.writeBytes("\n");
} catch (Exception ex) {
}
}
SequenceFeature sf = new SequenceFeature(type, sstart, eend,
description, status);
features.add(sf);
}
if (result[r].indexOf(" -1) {
StringBuffer seqString = new StringBuffer();
if (out != null) {
try {
out.writeBytes(result[r] + "\n");
} catch (Exception ex) {
}
}
while (result[++r].indexOf("") == -1) {
seqString.append(result[r]);
if (out != null) {
try {
out.writeBytes(result[r] + "\n");
} catch (Exception ex) {
}
}
}
if (out != null) {
try {
out.writeBytes(result[r] + "\n");
} catch (Exception ex) {
}
}
StringBuffer nonGapped = new StringBuffer();
for (int i = 0; i < sequence.getSequence().length(); i++) {
if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) {
nonGapped.append(sequence.getCharAt(i));
}
}
int absStart = seqString.toString().indexOf(nonGapped.toString());
if (absStart == -1) {
unknownSequences.add(sequence.getName());
features = null;
sbuffer.append(sequence.getName() +
" SEQUENCE NOT %100 MATCH \n");
continue;
}
int absEnd = absStart + nonGapped.toString().length();
absStart += 1;
if ((absStart != sequence.getStart()) ||
(absEnd != sequence.getEnd())) {
sbuffer.append("Updated: " + sequence.getName() + " " +
sequence.getStart() + "/" + sequence.getEnd() +
" to " + absStart + "/" + absEnd + "\n");
}
sequence.setStart(absStart);
sequence.setEnd(absEnd);
}
if (result[r].indexOf("") > -1) {
if (features != null) {
sequence.setSequenceFeatures(features);
}
features = null;
sequence = null;
if (out != null) {
try {
out.writeBytes("\n");
} catch (Exception ex) {
}
}
}
}
}
void findMissingIds(AlignmentI align) {
String data;
ArrayList cachedIds = new ArrayList();
try {
BufferedReader in = new BufferedReader(new FileReader(
jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
while ((data = in.readLine()) != null) {
if (data.indexOf("name") > -1) {
String name = parseElement(data, "", null);
cachedIds.add(name);
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
for (int i = 0; i < align.getHeight(); i++)
if (!cachedIds.contains(align.getSequenceAt(i).getName())) {
unknownSequences.add(align.getSequenceAt(i).getName());
}
}
void tryLocalCacheFirst(ArrayList ids, AlignmentI align) {
ArrayList cacheData = new ArrayList();
try {
BufferedReader in = new BufferedReader(new FileReader(
jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
// read through cache file, if the cache has sequences we're looking for
// add the lines to a new String array, Readthis new array and
// make sure we remove the ids from the list to retrieve from EBI
String data;
while ((data = in.readLine()) != null) {
if (data.indexOf("name") > -1) {
String name = parseElement(data, "", null);
if (ids.contains(name)) {
cacheData.add("");
cacheData.add(data);
while (data.indexOf("") == -1) {
data = in.readLine();
cacheData.add(data);
}
cacheData.add(data);
ids.remove(name);
}
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
String[] localData = new String[cacheData.size()];
cacheData.toArray(localData);
if ((localData != null) && (localData.length > 0)) {
ReadUniprotFile(localData, null, align);
}
}
String parseValue(String line, String tag, RandomAccessFile out) {
if (out != null) {
try {
out.writeBytes(line + "\n");
} catch (Exception ex) {
}
}
int index = line.indexOf(tag) + tag.length() + 1;
if (index == tag.length()) {
return "";
}
return line.substring(index, line.indexOf("\"", index + 1));
}
String parseElement(String line, String tag, RandomAccessFile out) {
if (out != null) {
try {
out.writeBytes(line + "\n");
} catch (Exception ex) {
}
}
int index = line.indexOf(tag) + tag.length();
return line.substring(index, line.indexOf(""));
}
}