X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FSequenceFeatureFetcher.java;h=520ba03826f0ddc6f5c11c07591372b94097a8e8;hb=b9926a366ae75eb28250166fba939d2f9bcf185f;hp=7770cedc82e818b5672f75321fac6f5c0b1530eb;hpb=9270f5adbff6622400c8fc933ebdb652cbb760f5;p=jalview.git
diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java
index 7770ced..520ba03 100755
--- a/src/jalview/io/SequenceFeatureFetcher.java
+++ b/src/jalview/io/SequenceFeatureFetcher.java
@@ -1,371 +1,247 @@
+/*
+* Jalview - A Sequence Alignment Editor and Viewer
+* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+*/
package jalview.io;
+import jalview.datamodel.*;
+
+import jalview.gui.*;
+
import java.io.*;
+
import java.util.*;
-import javax.swing.*;
-import jalview.io.*;
-import jalview.gui.*;
-import jalview.datamodel.*;
+import org.exolab.castor.mapping.Mapping;
+
+import org.exolab.castor.xml.*;
+import jalview.analysis.AlignSeq;
+
+
+
+/**
+ * DOCUMENT ME!
+ *
+ * @author $author$
+ * @version $Revision$
+ */
public class SequenceFeatureFetcher implements Runnable
{
+
AlignmentI align;
AlignmentPanel ap;
ArrayList unknownSequences;
- JInternalFrame outputFrame = new JInternalFrame();
- CutAndPasteTransfer output = new CutAndPasteTransfer(false);
+ CutAndPasteTransfer output = new CutAndPasteTransfer();
StringBuffer sbuffer = new StringBuffer();
+ Vector localCache = new Vector();
+
+ Vector getUniprotEntries(File file)
+ {
+
+ UniprotFile uni = new UniprotFile();
+ try
+ {
+ // 1. Load the mapping information from the file
+ Mapping map = new Mapping(uni.getClass().getClassLoader());
+ java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml");
+ map.loadMapping(url);
+
+ // 2. Unmarshal the data
+ Unmarshaller unmar = new Unmarshaller();
+ unmar.setIgnoreExtraElements(true);
+ unmar.setMapping(map);
+ uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
+ localCache.addAll( uni.getUniprotEntries() );
+
+ // 3. marshal the data with the total price back and print the XML in the console
+ // Marshaller marshaller = new Marshaller(
+ // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))
+ // );
+ // marshaller.setMapping(map);
+ // marshaller.marshal(uni);
+
+ }
+ catch (Exception e)
+ {
+ System.out.println("Error getUniprotEntries() "+e);
+ // e.printStackTrace();
+ // if(!updateLocalCache)
+ // file.delete();
+
+ }
+ return uni.getUniprotEntries();
+ }
+
+ /**
+ * Creates a new SequenceFeatureFetcher object.
+ *
+ * @param align DOCUMENT ME!
+ * @param ap DOCUMENT ME!
+ */
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
{
unknownSequences = new ArrayList();
this.align = align;
this.ap = ap;
+
Thread thread = new Thread(this);
thread.start();
}
+ /**
+ * DOCUMENT ME!
+ */
public void run()
-{
-
- String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
+ {
+ try
+ {
+ int seqIndex = 0;
+ Vector sequences = align.getSequences();
- RandomAccessFile out = null;
+ while (seqIndex < sequences.size())
+ {
+ Vector ids = new Vector();
- try{
- if (cache == null)
- {
- jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
- cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
- }
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
+ seqIndex++, i++)
+ {
+ SequenceI sequence = (SequenceI) sequences.get(seqIndex);
+ ids.add(sequence.getName());
+ unknownSequences.add(sequence.getName());
+ }
+ ///////////////////////////////////
+ ///READ FROM EBI
+ if (ids.size() > 0)
+ {
+ StringBuffer remainingIds = new StringBuffer("uniprot:");
+ for (int i = 0; i < ids.size(); i++)
+ {
+ remainingIds.append(ids.get(i) + ";");
+ }
+ EBIFetchClient ebi = new EBIFetchClient();
+ File file = ebi.fetchDataAsFile(remainingIds.toString(),
+ "xml", null);
- File test = new File(cache);
- if( !test.exists() )
- {
- out = new RandomAccessFile(cache, "rw");
- out.writeBytes("\n");
- out.writeBytes("\n");
+ if (file != null)
+ {
+ ReadUniprotFile(file, align, ids);
+ }
+ }
+ }
}
- else
+ catch (Exception ex)
{
- out = new RandomAccessFile(cache, "rw");
- // open exisiting cache and remove from the end
- long lastLine = 0;
- String data;
- while ( (data = out.readLine()) != null)
- {
- if (data.indexOf("") > -1)
- lastLine = out.getFilePointer();
-
- }
- out.seek(lastLine);
+ ex.printStackTrace();
}
- int seqIndex = 0;
- Vector sequences = align.getSequences();
-
- while (seqIndex < sequences.size())
+ if (sbuffer.length() > 0)
{
- ArrayList ids = new ArrayList();
- for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
- {
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);
- ids.add(sequence.getName());
- }
-
- tryLocalCacheFirst(ids, align);
-
- if (ids.size() > 0)
- {
- StringBuffer remainingIds = new StringBuffer("uniprot:");
- for (int i = 0; i < ids.size(); i++)
- remainingIds.append(ids.get(i) + ";");
-
- EBIFetchClient ebi = new EBIFetchClient();
- String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
-
- if(result!=null)
- ReadUniprotFile(result, out, align);
- }
-
+ output.setText(
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +
+ "altered, most likely the start/end residue will have been updated.\n" +
+ "Save your alignment to maintain the updated id.\n\n" +
+ sbuffer.toString());
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
}
- if (out != null)
+ if (unknownSequences.size() > 0)
{
- out.writeBytes("\n");
- out.close();
+ new WSWUBlastClient(ap, align, unknownSequences);
}
- }catch(Exception ex){ex.printStackTrace();}
+ else
+ ((Alignment)align).featuresAdded = true;
- ap.repaint();
- findMissingIds(align);
- if(sbuffer.length()>0)
- {
- output.formatForOutput();
- outputFrame.setContentPane(output);
- output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"
- +"altered, most likely the start/end residue will have been updated.\n"
- +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());
- Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300);
+ ap.repaint();
}
- if(unknownSequences.size()>0)
+ /**
+ * DOCUMENT ME!
+ *
+ * @param result DOCUMENT ME!
+ * @param out DOCUMENT ME!
+ * @param align DOCUMENT ME!
+ */
+ void ReadUniprotFile(File file, AlignmentI align, Vector ids)
{
- //ignore for now!!!!!!!!!!
- // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
- }
-
-}
+ if(!file.exists())
+ return;
-void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
-{
- SequenceI sequence = null;
- Vector features = null;
- String type, description, status, start, end, pdb = null;
+ SequenceI sequence = null;
+ // String pdb = null;
+ Vector entries = getUniprotEntries(file);
- for (int r = 0; r < result.length; r++)
- {
- if(sequence==null && result[r].indexOf("")>-1)
+ int i, iSize = entries==null?0:entries.size();
+ UniprotEntry entry;
+ for (i = 0; i < iSize; i++)
{
- long filePointer = 0;
-
- if(out!=null)
- try{
- filePointer=out.getFilePointer();
- out.writeBytes("\n");
- }catch(Exception ex){}
+ entry = (UniprotEntry) entries.elementAt(i);
+ String idmatch = entry.getAccession();
+ sequence = align.findName(idmatch);
- String seqName = parseElement( result[r], "" , out);
- sequence = align.findName( seqName ) ;
- if(sequence==null)
+ if (sequence == null)
{
- sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));
- if(sequence!=null)
- {
- sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");
- sequence.setName(seqName);
- }
+ //Sequence maybe Name, not Accession
+ idmatch = entry.getName();
+ sequence = align.findName(idmatch);
}
- if(sequence==null)
+
+ if (sequence == null)
{
- sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");
- sequence = align.findName( result[r] ) ;
-
- // this entry has been suggested by ebi.
- // doesn't match id in alignment file
- try { out.setLength(filePointer); } catch (Exception ex) {}
- // now skip to next entry
- while( result[r].indexOf("")==-1)
- r++;
+ continue;
}
- features = new Vector();
- type=""; start="0"; end="0"; description=""; status=""; pdb="";
+ ids.remove(sequence.getName());
+ unknownSequences.remove(sequence.getName());
- }
-
- if(sequence==null)
- continue;
-
- if( result[r].indexOf("-1)
- {
- pdb = parseValue( result[r], "value=" , out);
- sequence.setPDBId(pdb);
- }
-
- if(result[r].indexOf("feature type")>-1)
- {
- type = parseValue( result[r], "type=" , out);
- description = parseValue( result[r], "description=" , null );
- status = parseValue ( result[r], "status=", null);
-
- while( result[r].indexOf("position")==-1)
- {
- r++; //
- }
- // r++;
- if(result[r].indexOf("begin")>-1)
- {
- start = parseValue( result[r], "position=" , out);
- end = parseValue( result[++r], "position=" , out);
- }
- else
- {
- start = parseValue( result[r], "position=" , out);
- end = parseValue( result[r], "position=" , null);
- }
- int sstart = Integer.parseInt(start);
- int eend = Integer.parseInt(end);
- if(out!=null)
- try{ out.writeBytes("\n"); }catch(Exception ex){}
-
- SequenceFeature sf = new SequenceFeature(type,
- sstart,
- eend,
- description,
- status);
- features.add(sf);
- }
-
- if(result[r].indexOf("-1)
- {
- StringBuffer seqString = new StringBuffer();
-
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
-
- while(result[++r].indexOf("")==-1)
- {
- seqString.append(result[r]);
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
- }
-
- if(out!=null)
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
-
- StringBuffer nonGapped = new StringBuffer();
- for (int i = 0; i < sequence.getSequence().length(); i++)
- {
- if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
- nonGapped.append(sequence.getCharAt(i));
- }
-
- int absStart = seqString.toString().indexOf(nonGapped.toString());
- if(absStart==-1)
- {
- unknownSequences.add(sequence.getName());
- features = null;
- sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");
- continue;
- }
-
- int absEnd = absStart + nonGapped.toString().length();
- absStart+=1;
-
- if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())
- sbuffer.append("Updated: "+sequence.getName()+" "+
- sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");
-
-
- sequence.setStart(absStart);
- sequence.setEnd(absEnd);
-
- }
-
- if(result[r].indexOf("")>-1)
- {
- if(features!=null)
- sequence.setSequenceFeatures( features );
- features = null;
- sequence = null;
- if(out!=null)
- try{ out.writeBytes("\n"); }catch(Exception ex){}
-
- }
- }
-}
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());
-void findMissingIds(AlignmentI align)
-{
- String data;
- ArrayList cachedIds = new ArrayList();
-
- try
- {
- BufferedReader in = new BufferedReader(
- new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
+ int absStart = entry.getUniprotSequence().getContent().indexOf(
+ nonGapped.toString());
- while ( (data = in.readLine()) != null)
- {
- if (data.indexOf("name") > -1)
+ if (absStart == -1)
{
- String name = parseElement(data, "", null);
- cachedIds.add(name);
- }
- }
- }
- catch (Exception ex)
- { ex.printStackTrace(); }
+ unknownSequences.add(sequence.getName());
+ sbuffer.append(sequence.getName() +
+ " SEQUENCE NOT %100 MATCH \n");
- for(int i=0; i-1)
+ if ( (absStart != sequence.getStart()) ||
+ (absEnd != sequence.getEnd()))
{
- String name = parseElement( data, "" , null) ;
- if(ids.contains( name ) )
- {
- cacheData.add("");
- cacheData.add(data);
- while( data.indexOf("")==-1)
- {
- data = in.readLine();
- cacheData.add(data);
- }
- cacheData.add(data);
-
- ids.remove( name );
- }
+ sbuffer.append("Updated: " + sequence.getName() + " " +
+ sequence.getStart() + "/" + sequence.getEnd() +
+ " to " + absStart + "/" + absEnd + "\n");
}
+
+ sequence.setSequenceFeatures(entry.getFeatures());
+ sequence.setStart(absStart);
+ sequence.setEnd(absEnd);
}
}
- catch(Exception ex){ex.printStackTrace();}
-
- String [] localData = new String[cacheData.size()];
- cacheData.toArray( localData );
- if(localData!=null && localData.length>0)
- ReadUniprotFile(localData, null, align);
-}
-
-
-String parseValue(String line, String tag, RandomAccessFile out)
-{
- if(out!=null)
- try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
-
-
- int index = line.indexOf(tag)+tag.length()+1;
- if(index==tag.length())
- return "";
-
- return line.substring( index, line.indexOf("\"", index+1) );
}
-String parseElement(String line, String tag, RandomAccessFile out)
-{
- if (out != null)
- try
- {
- out.writeBytes(line + "\n");
- }
- catch (Exception ex)
- {}
-
- int index = line.indexOf(tag) + tag.length();
- return line.substring(index, line.indexOf(""));
-
- }
-}