X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=520ba03826f0ddc6f5c11c07591372b94097a8e8;hb=b9926a366ae75eb28250166fba939d2f9bcf185f;hp=c8c39dc893c0c0009ef75bad39806d647e206170;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git
diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java
index c8c39dc..520ba03 100755
--- a/src/jalview/io/SequenceFeatureFetcher.java
+++ b/src/jalview/io/SequenceFeatureFetcher.java
@@ -22,398 +22,226 @@ import jalview.datamodel.*;
import jalview.gui.*;
-import jalview.io.*;
-
import java.io.*;
import java.util.*;
-import javax.swing.*;
-
-
-public class SequenceFeatureFetcher implements Runnable {
- AlignmentI align;
- AlignmentPanel ap;
- ArrayList unknownSequences;
- CutAndPasteTransfer output = new CutAndPasteTransfer();
- StringBuffer sbuffer = new StringBuffer();
-
- public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) {
- unknownSequences = new ArrayList();
- this.align = align;
- this.ap = ap;
-
- Thread thread = new Thread(this);
- thread.start();
- }
-
- public void run() {
- String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
-
- RandomAccessFile out = null;
-
- try {
- if (cache == null) {
- jalview.bin.Cache.setProperty("UNIPROT_CACHE",
- System.getProperty("user.home") + "/uniprot.xml");
- cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
- }
-
- File test = new File(cache);
-
- if (!test.exists()) {
- out = new RandomAccessFile(cache, "rw");
- out.writeBytes("\n");
- out.writeBytes("\n");
- } else {
- out = new RandomAccessFile(cache, "rw");
+import org.exolab.castor.mapping.Mapping;
- // open exisiting cache and remove from the end
- long lastLine = 0;
- String data;
+import org.exolab.castor.xml.*;
+import jalview.analysis.AlignSeq;
- while ((data = out.readLine()) != null) {
- if (data.indexOf("") > -1) {
- lastLine = out.getFilePointer();
- }
- }
- out.seek(lastLine);
- }
- int seqIndex = 0;
- Vector sequences = align.getSequences();
+/**
+ * DOCUMENT ME!
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class SequenceFeatureFetcher implements Runnable
+{
- while (seqIndex < sequences.size()) {
- ArrayList ids = new ArrayList();
+ AlignmentI align;
+ AlignmentPanel ap;
+ ArrayList unknownSequences;
+ CutAndPasteTransfer output = new CutAndPasteTransfer();
+ StringBuffer sbuffer = new StringBuffer();
- for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
- seqIndex++, i++) {
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);
- ids.add(sequence.getName());
- }
+ Vector localCache = new Vector();
- tryLocalCacheFirst(ids, align);
+ Vector getUniprotEntries(File file)
+ {
- if (ids.size() > 0) {
- StringBuffer remainingIds = new StringBuffer("uniprot:");
+ UniprotFile uni = new UniprotFile();
+ try
+ {
+ // 1. Load the mapping information from the file
+ Mapping map = new Mapping(uni.getClass().getClassLoader());
+ java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml");
+ map.loadMapping(url);
- for (int i = 0; i < ids.size(); i++)
- remainingIds.append(ids.get(i) + ";");
+ // 2. Unmarshal the data
+ Unmarshaller unmar = new Unmarshaller();
+ unmar.setIgnoreExtraElements(true);
+ unmar.setMapping(map);
+ uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
+ localCache.addAll( uni.getUniprotEntries() );
- EBIFetchClient ebi = new EBIFetchClient();
- String[] result = ebi.fetchData(remainingIds.toString(),
- "xml", null);
+ // 3. marshal the data with the total price back and print the XML in the console
+ // Marshaller marshaller = new Marshaller(
+ // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))
+ // );
+ // marshaller.setMapping(map);
+ // marshaller.marshal(uni);
- if (result != null) {
- ReadUniprotFile(result, out, align);
- }
- }
- }
-
- if (out != null) {
- out.writeBytes("\n");
- out.close();
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- }
-
- ap.repaint();
- findMissingIds(align);
+ }
+ catch (Exception e)
+ {
+ System.out.println("Error getUniprotEntries() "+e);
+ // e.printStackTrace();
+ // if(!updateLocalCache)
+ // file.delete();
- if (sbuffer.length() > 0) {
- output.setText(
- "Your sequences have been matched to Uniprot. Some of the ids have been\n" +
- "altered, most likely the start/end residue will have been updated.\n" +
- "Save your alignment to maintain the updated id.\n\n" +
- sbuffer.toString());
- Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
+ }
+ return uni.getUniprotEntries();
+ }
+
+ /**
+ * Creates a new SequenceFeatureFetcher object.
+ *
+ * @param align DOCUMENT ME!
+ * @param ap DOCUMENT ME!
+ */
+ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
+ {
+ unknownSequences = new ArrayList();
+ this.align = align;
+ this.ap = ap;
+
+ Thread thread = new Thread(this);
+ thread.start();
+ }
+
+ /**
+ * DOCUMENT ME!
+ */
+ public void run()
+ {
+ try
+ {
+ int seqIndex = 0;
+ Vector sequences = align.getSequences();
+
+ while (seqIndex < sequences.size())
+ {
+ Vector ids = new Vector();
+
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
+ seqIndex++, i++)
+ {
+ SequenceI sequence = (SequenceI) sequences.get(seqIndex);
+ ids.add(sequence.getName());
+ unknownSequences.add(sequence.getName());
}
- if (unknownSequences.size() > 0) {
- //ignore for now!!!!!!!!!!
- // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
+ ///////////////////////////////////
+ ///READ FROM EBI
+ if (ids.size() > 0)
+ {
+ StringBuffer remainingIds = new StringBuffer("uniprot:");
+ for (int i = 0; i < ids.size(); i++)
+ {
+ remainingIds.append(ids.get(i) + ";");
+ }
+ EBIFetchClient ebi = new EBIFetchClient();
+ File file = ebi.fetchDataAsFile(remainingIds.toString(),
+ "xml", null);
+
+
+ if (file != null)
+ {
+ ReadUniprotFile(file, align, ids);
+ }
}
+ }
}
-
- void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) {
- SequenceI sequence = null;
- Vector features = null;
- String type;
- String description;
- String status;
- String start;
- String end;
- String pdb = null;
-
- for (int r = 0; r < result.length; r++) {
- if ((sequence == null) && (result[r].indexOf("") > -1)) {
- long filePointer = 0;
-
- if (out != null) {
- try {
- filePointer = out.getFilePointer();
- out.writeBytes("\n");
- } catch (Exception ex) {
- }
- }
-
- String seqName = parseElement(result[r], "", out);
- sequence = align.findName(seqName);
-
- if (sequence == null) {
- sequence = align.findName(seqName.substring(0,
- seqName.indexOf('_')));
-
- if (sequence != null) {
- sbuffer.append("changing " + sequence.getName() +
- " to " + seqName + "\n");
- sequence.setName(seqName);
- }
- }
-
- if (sequence == null) {
- sbuffer.append("UNIPROT updated suggestion is " +
- result[r] + "\n");
- sequence = align.findName(result[r]);
-
- // this entry has been suggested by ebi.
- // doesn't match id in alignment file
- try {
- out.setLength(filePointer);
- } catch (Exception ex) {
- }
-
- // now skip to next entry
- while (result[r].indexOf("") == -1)
- r++;
- }
-
- features = new Vector();
- type = "";
- start = "0";
- end = "0";
- description = "";
- status = "";
- pdb = "";
- }
-
- if (sequence == null) {
- continue;
- }
-
- if (result[r].indexOf(" -1) {
- pdb = parseValue(result[r], "value=", out);
- sequence.setPDBId(pdb);
- }
-
- if (result[r].indexOf("feature type") > -1) {
- type = parseValue(result[r], "type=", out);
- description = parseValue(result[r], "description=", null);
- status = parseValue(result[r], "status=", null);
-
- while (result[r].indexOf("position") == -1) {
- r++; //
- }
-
- // r++;
- if (result[r].indexOf("begin") > -1) {
- start = parseValue(result[r], "position=", out);
- end = parseValue(result[++r], "position=", out);
- } else {
- start = parseValue(result[r], "position=", out);
- end = parseValue(result[r], "position=", null);
- }
-
- int sstart = Integer.parseInt(start);
- int eend = Integer.parseInt(end);
-
- if (out != null) {
- try {
- out.writeBytes("\n");
- } catch (Exception ex) {
- }
- }
-
- SequenceFeature sf = new SequenceFeature(type, sstart, eend,
- description, status);
- features.add(sf);
- }
-
- if (result[r].indexOf(" -1) {
- StringBuffer seqString = new StringBuffer();
-
- if (out != null) {
- try {
- out.writeBytes(result[r] + "\n");
- } catch (Exception ex) {
- }
- }
-
- while (result[++r].indexOf("") == -1) {
- seqString.append(result[r]);
-
- if (out != null) {
- try {
- out.writeBytes(result[r] + "\n");
- } catch (Exception ex) {
- }
- }
- }
-
- if (out != null) {
- try {
- out.writeBytes(result[r] + "\n");
- } catch (Exception ex) {
- }
- }
-
- StringBuffer nonGapped = new StringBuffer();
-
- for (int i = 0; i < sequence.getSequence().length(); i++) {
- if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) {
- nonGapped.append(sequence.getCharAt(i));
- }
- }
-
- int absStart = seqString.toString().indexOf(nonGapped.toString());
-
- if (absStart == -1) {
- unknownSequences.add(sequence.getName());
- features = null;
- sbuffer.append(sequence.getName() +
- " SEQUENCE NOT %100 MATCH \n");
-
- continue;
- }
-
- int absEnd = absStart + nonGapped.toString().length();
- absStart += 1;
-
- if ((absStart != sequence.getStart()) ||
- (absEnd != sequence.getEnd())) {
- sbuffer.append("Updated: " + sequence.getName() + " " +
- sequence.getStart() + "/" + sequence.getEnd() +
- " to " + absStart + "/" + absEnd + "\n");
- }
-
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
- }
-
- if (result[r].indexOf("") > -1) {
- if (features != null) {
- sequence.setSequenceFeatures(features);
- }
-
- features = null;
- sequence = null;
-
- if (out != null) {
- try {
- out.writeBytes("\n");
- } catch (Exception ex) {
- }
- }
- }
- }
+ catch (Exception ex)
+ {
+ ex.printStackTrace();
}
- void findMissingIds(AlignmentI align) {
- String data;
- ArrayList cachedIds = new ArrayList();
-
- try {
- BufferedReader in = new BufferedReader(new FileReader(
- jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
-
- while ((data = in.readLine()) != null) {
- if (data.indexOf("name") > -1) {
- String name = parseElement(data, "", null);
- cachedIds.add(name);
- }
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- }
-
- for (int i = 0; i < align.getHeight(); i++)
- if (!cachedIds.contains(align.getSequenceAt(i).getName())) {
- unknownSequences.add(align.getSequenceAt(i).getName());
- }
+ if (sbuffer.length() > 0)
+ {
+ output.setText(
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +
+ "altered, most likely the start/end residue will have been updated.\n" +
+ "Save your alignment to maintain the updated id.\n\n" +
+ sbuffer.toString());
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
}
- void tryLocalCacheFirst(ArrayList ids, AlignmentI align) {
- ArrayList cacheData = new ArrayList();
-
- try {
- BufferedReader in = new BufferedReader(new FileReader(
- jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
-
- // read through cache file, if the cache has sequences we're looking for
- // add the lines to a new String array, Readthis new array and
- // make sure we remove the ids from the list to retrieve from EBI
- String data;
-
- while ((data = in.readLine()) != null) {
- if (data.indexOf("name") > -1) {
- String name = parseElement(data, "", null);
-
- if (ids.contains(name)) {
- cacheData.add("");
- cacheData.add(data);
-
- while (data.indexOf("") == -1) {
- data = in.readLine();
- cacheData.add(data);
- }
-
- cacheData.add(data);
-
- ids.remove(name);
- }
- }
- }
- } catch (Exception ex) {
- ex.printStackTrace();
- }
-
- String[] localData = new String[cacheData.size()];
- cacheData.toArray(localData);
-
- if ((localData != null) && (localData.length > 0)) {
- ReadUniprotFile(localData, null, align);
- }
+ if (unknownSequences.size() > 0)
+ {
+ new WSWUBlastClient(ap, align, unknownSequences);
}
-
- String parseValue(String line, String tag, RandomAccessFile out) {
- if (out != null) {
- try {
- out.writeBytes(line + "\n");
- } catch (Exception ex) {
- }
- }
-
- int index = line.indexOf(tag) + tag.length() + 1;
-
- if (index == tag.length()) {
- return "";
- }
-
- return line.substring(index, line.indexOf("\"", index + 1));
+ else
+ ((Alignment)align).featuresAdded = true;
+
+
+ ap.repaint();
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param result DOCUMENT ME!
+ * @param out DOCUMENT ME!
+ * @param align DOCUMENT ME!
+ */
+ void ReadUniprotFile(File file, AlignmentI align, Vector ids)
+ {
+ if(!file.exists())
+ return;
+
+ SequenceI sequence = null;
+ // String pdb = null;
+
+ Vector entries = getUniprotEntries(file);
+
+ int i, iSize = entries==null?0:entries.size();
+ UniprotEntry entry;
+ for (i = 0; i < iSize; i++)
+ {
+ entry = (UniprotEntry) entries.elementAt(i);
+ String idmatch = entry.getAccession();
+ sequence = align.findName(idmatch);
+
+ if (sequence == null)
+ {
+ //Sequence maybe Name, not Accession
+ idmatch = entry.getName();
+ sequence = align.findName(idmatch);
+ }
+
+ if (sequence == null)
+ {
+ continue;
+ }
+
+ ids.remove(sequence.getName());
+ unknownSequences.remove(sequence.getName());
+
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());
+
+ int absStart = entry.getUniprotSequence().getContent().indexOf(
+ nonGapped.toString());
+
+ if (absStart == -1)
+ {
+ unknownSequences.add(sequence.getName());
+ sbuffer.append(sequence.getName() +
+ " SEQUENCE NOT %100 MATCH \n");
+
+ continue;
+ }
+
+ int absEnd = absStart + nonGapped.toString().length();
+ absStart += 1;
+
+ if ( (absStart != sequence.getStart()) ||
+ (absEnd != sequence.getEnd()))
+ {
+ sbuffer.append("Updated: " + sequence.getName() + " " +
+ sequence.getStart() + "/" + sequence.getEnd() +
+ " to " + absStart + "/" + absEnd + "\n");
+ }
+
+ sequence.setSequenceFeatures(entry.getFeatures());
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
}
+ }
+}
- String parseElement(String line, String tag, RandomAccessFile out) {
- if (out != null) {
- try {
- out.writeBytes(line + "\n");
- } catch (Exception ex) {
- }
- }
-
- int index = line.indexOf(tag) + tag.length();
- return line.substring(index, line.indexOf(""));
- }
-}