X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fgui%2FSequenceFetcher.java;h=2f92333a679a3e4a76ecd58f1d510b5fa153e303;hb=ea7870f9bf9ae81876afbe3fdd4d9bd9022b4014;hp=f4e289b05f4070d7406cca38c72157bc6a547ee1;hpb=451619e33c0a90c8130c7d79ffa38161af1c6e0f;p=jalview.git diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index f4e289b..2f92333 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,20 +18,29 @@ */ package jalview.gui; -import javax.swing.*; +import java.io.*; +import java.util.*; + import java.awt.*; import java.awt.event.*; -import jalview.io.EBIFetchClient; +import javax.swing.*; + import MCview.*; import jalview.datamodel.*; -import jalview.analysis.AlignSeq; +import jalview.datamodel.xdb.embl.*; import java.io.File; import jalview.io.*; -import java.util.*; +import jalview.ws.DBRefFetcher; +import jalview.ws.EBIFetchClient; + +import java.awt.Rectangle; +import java.awt.BorderLayout; +import java.awt.Dimension; public class SequenceFetcher - extends JPanel implements Runnable +extends JPanel implements Runnable { + jalview.ws.SequenceFetcher sfetch; JInternalFrame frame; AlignFrame alignFrame; StringBuffer result; @@ -39,7 +48,26 @@ public class SequenceFetcher public SequenceFetcher(AlignFrame af) { alignFrame = af; + sfetch = new jalview.ws.SequenceFetcher(); database.addItem(noDbSelected); + /* + * Dynamically generated database list + * will need a translation function from + * internal source to externally distinct names. + * UNIPROT and UP_NAME are identical DB sources, + * and should be collapsed. + * + + String dbs[] = sfetch.getSupportedDb(); + for (int i=0; i 0) { showErrorMessage(error); @@ -178,68 +214,133 @@ public class SequenceFetcher result = new StringBuffer(); if (database.getSelectedItem().equals("Uniprot")) { - getUniprotFile(textfield.getText()); + getUniprotFile(textArea.getText()); } else if (database.getSelectedItem().equals("EMBL") - || database.getSelectedItem().equals("EMBLCDS")) + || database.getSelectedItem().equals("EMBLCDS")) { - StringTokenizer st = new StringTokenizer(textfield.getText(), ";"); + String DBRefSource = database.getSelectedItem().equals("EMBLCDS") + ? jalview.datamodel.DBRefSource.EMBLCDS + : jalview.datamodel.DBRefSource.EMBL; + + StringTokenizer st = new StringTokenizer(textArea.getText(), ";"); + SequenceI[] seqs = null; while(st.hasMoreTokens()) { EBIFetchClient dbFetch = new EBIFetchClient(); - - String[] reply = dbFetch.fetchData( - database.getSelectedItem().toString().toLowerCase( - ) + ":" + st.nextToken(), - "fasta", "raw"); -// - if (reply != null) + String qry = database.getSelectedItem().toString().toLowerCase( + ) + ":" + st.nextToken(); + File reply = dbFetch.fetchDataAsFile( + qry, + "emblxml",null); + + jalview.datamodel.xdb.embl.EmblFile efile=null; + if (reply != null && reply.exists()) { - for (int i = 0; i < reply.length; i++) - result.append(reply[i] + "\n"); + efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply); } - } - - if(result!=null && result.length()>1) // arbitrary minimum length for a seuqence file - { - System.out.println(result.toString()); + if (efile!=null) { + for (Iterator i=efile.getEntries().iterator(); i.hasNext(); ) { + EmblEntry entry = (EmblEntry) i.next(); + SequenceI[] seqparts = entry.getSequences(false,true, DBRefSource); + if (seqparts!=null) { + SequenceI[] newseqs = null; + int si=0; + if (seqs==null) { + newseqs = new SequenceI[seqparts.length]; + } else { + newseqs = new SequenceI[seqs.length+seqparts.length]; + + for (;si0) { + if (parseResult(new Alignment(seqs), null, null)!=null) + { + result.append("# Successfully parsed the "+database.getSelectedItem()+" Queries into an Alignment"); + } } } else if (database.getSelectedItem().equals("PDB")) { - StringTokenizer qset = new StringTokenizer(textfield.getText(), ";"); + StringTokenizer qset = new StringTokenizer(textArea.getText(), ";"); String query; + SequenceI[] seqs = null; while (qset.hasMoreTokens() && ((query = qset.nextToken())!=null)) { - StringBuffer respart = getPDBFile(query.toUpperCase()); - if(respart!=null) - result.append(respart); + SequenceI[] seqparts = getPDBFile(query.toUpperCase()); + if (seqparts != null) + { + if (seqs == null) + { + seqs = seqparts; + } + else + { + SequenceI[] newseqs = new SequenceI[seqs.length+seqparts.length]; + int i=0; + for (; i < seqs.length; i++) + { + newseqs[i] = seqs[i]; + seqs[i] = null; + } + for (int j=0;j 0) + { + if (parseResult(new Alignment(seqs), null, null)!=null) + { + result.append( + "# Successfully parsed the PDB File Queries into an Alignment"); + } } - - - if (result.length()>0) - parseResult(result.toString(), null); } else if( database.getSelectedItem().equals("PFAM")) { - try{ + try + { result.append(new FastaFile( - "http://www.sanger.ac.uk/cgi-bin/Pfam/getalignment.pl?format=fal&acc=" - + textfield.getText().toUpperCase(), "URL").print() - ); + "http://www.sanger.ac.uk/cgi-bin/Pfam/getalignment.pl?format=fal&acc=" + + textArea.getText().toUpperCase(), "URL").print() + ); - if(result.length()>0) - parseResult( result.toString(), textfield.getText().toUpperCase() ); + if(result.length()>0) + { + parseResult( result.toString(), textArea.getText().toUpperCase() ); + } - }catch(java.io.IOException ex) - { result = null; } + } + catch (java.io.IOException ex) + { + result = null; + } } if (result == null || result.length() == 0) - showErrorMessage("Error retrieving " + textfield.getText() - + " from " + database.getSelectedItem()); + { + showErrorMessage("Error retrieving " + textArea.getText() + + " from " + database.getSelectedItem()); + } resetDialog(); return; @@ -277,16 +378,16 @@ public class SequenceFetcher if (entry.getProtein() != null) { - name.append(" " + entry.getProtein().getName().elementAt(0)); + name.append(" " + entry.getProtein().getName().elementAt(0)); } result.append(name + "\n" + entry.getUniprotSequence().getContent() + - "\n"); + "\n"); } //Then read in the features and apply them to the dataset - SequenceI[] sequence = parseResult(result.toString(), null); + Alignment al = parseResult(result.toString(), null); for (int i = 0; i < entries.size(); i++) { UniprotEntry entry = (UniprotEntry) entries.elementAt(i); @@ -296,7 +397,9 @@ public class SequenceFetcher { PDBEntry pdb = (PDBEntry) e.nextElement(); if (!pdb.getType().equals("PDB")) + { continue; + } onlyPdbEntries.addElement(pdb); } @@ -304,15 +407,16 @@ public class SequenceFetcher Enumeration en2 = entry.getAccession().elements(); while (en2.hasMoreElements()) { - sequence[i].getDatasetSequence().addDBRef(new DBRefEntry(DBRefSource.UNIPROT, - "0", - en2.nextElement().toString())); + al.getSequenceAt(i).getDatasetSequence().addDBRef(new DBRefEntry( + DBRefSource.UNIPROT, + "0", + en2.nextElement().toString())); } - sequence[i].getDatasetSequence().setPDBId(onlyPdbEntries); + al.getSequenceAt(i).getDatasetSequence().setPDBId(onlyPdbEntries); if (entry.getFeature() != null) { e = entry.getFeature().elements(); @@ -320,16 +424,16 @@ public class SequenceFetcher { SequenceFeature sf = (SequenceFeature) e.nextElement(); sf.setFeatureGroup("Uniprot"); - sequence[i].getDatasetSequence().addSequenceFeature( sf ); + al.getSequenceAt(i).getDatasetSequence().addSequenceFeature( sf ); } } } } } - StringBuffer getPDBFile(String id) + SequenceI[] getPDBFile(String id) { - StringBuffer result = new StringBuffer(); + Vector result = new Vector(); String chain = null; if (id.indexOf(":") > -1) { @@ -338,9 +442,12 @@ public class SequenceFetcher } EBIFetchClient ebi = new EBIFetchClient(); - String file = ebi.fetchDataAsFile("pdb:" + id, "pdb", "raw").getAbsolutePath(); + String file = ebi.fetchDataAsFile("pdb:" + id, "pdb", "raw"). + getAbsolutePath(); if (file == null) + { return null; + } try { PDBfile pdbfile = new PDBfile(file, jalview.io.AppletFormatAdapter.FILE); @@ -349,141 +456,137 @@ public class SequenceFetcher if (chain == null || ( (PDBChain) pdbfile.chains.elementAt(i)).id. toUpperCase().equals(chain)) + { + PDBChain pdbchain = (PDBChain) pdbfile.chains.elementAt(i); + // Get the Chain's Sequence - who's dataset includes any special features added from the PDB file + SequenceI sq = pdbchain.sequence; + // Specially formatted name for the PDB chain sequences retrieved from the PDB + sq.setName("PDB|"+id+"|"+sq.getName()); + // Might need to add more metadata to the PDBEntry object + // like below + /* + * PDBEntry entry = new PDBEntry(); + // Construct the PDBEntry + entry.setId(id); + if (entry.getProperty() == null) + entry.setProperty(new Hashtable()); + entry.getProperty().put("chains", + pdbchain.id + + "=" + sq.getStart() + + "-" + sq.getEnd()); + sq.getDatasetSequence().addPDBId(entry); + */ + // Add PDB DB Refs + // We make a DBRefEtntry because we have obtained the PDB file from a verifiable source + // JBPNote - PDB DBRefEntry should also carry the chain and mapping information + DBRefEntry dbentry = new DBRefEntry(jalview.datamodel.DBRefSource.PDB, + "0", id + pdbchain.id); + sq.addDBRef(dbentry); + // and add seuqence to the retrieved set + result.addElement(sq.deriveSequence()); + } + } - result.append("\n>PDB|" + id + "|" + - ( (PDBChain) pdbfile.chains.elementAt(i)).sequence. - getName() + - "\n" - + - ( (PDBChain) pdbfile.chains.elementAt(i)).sequence. - getSequenceAsString()); + if (result.size() < 1) + { + throw new Exception("WsDBFetch for PDB id resulted in zero result size"); } } catch (Exception ex) // Problem parsing PDB file { jalview.bin.Cache.log.warn("Exception when retrieving " + - textfield.getText() + " from " + - database.getSelectedItem(), ex); + textArea.getText() + " from " + + database.getSelectedItem(), ex); return null; } - return result; - } - SequenceI[] parseResult(String result, String title) + SequenceI[] results = new SequenceI[result.size()]; + for (int i = 0, j = result.size(); i < j; i++) + { + results[i] = (SequenceI) result.elementAt(i); + result.setElementAt(null,i); + } + return results; + } + Alignment parseResult(String result, String title) { String format = new IdentifyFile().Identify(result, "Paste"); - SequenceI[] sequences = null; - + Alignment sequences = null; if (FormatAdapter.isValidFormat(format)) { sequences = null; - try{ sequences = new FormatAdapter().readFile(result.toString(), "Paste", - format);} - catch(Exception ex){} + try + { + sequences = new FormatAdapter().readFile(result.toString(), "Paste", + format); + } + catch (Exception ex) + {} + + if (sequences!=null) + { + return parseResult(sequences, title, format); + } + } + else + { + showErrorMessage("Error retrieving " + textArea.getText() + + " from " + database.getSelectedItem()); + } + + return null; + } + + Alignment parseResult(Alignment al, String title, String currentFileFormat) + { - if (sequences != null && sequences.length > 0) + if (al != null && al.getHeight() > 0) + { + if (alignFrame == null) { - if (alignFrame == null) + AlignFrame af = new AlignFrame(al, + AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + if (currentFileFormat!=null) { - AlignFrame af = new AlignFrame(new Alignment(sequences), - AlignFrame.DEFAULT_WIDTH, - AlignFrame.DEFAULT_HEIGHT -); - af.currentFileFormat = format; - if(title==null) - title = "Retrieved from " + database.getSelectedItem(); - Desktop.addInternalFrame(af, - title, - AlignFrame.DEFAULT_WIDTH, - AlignFrame.DEFAULT_HEIGHT); - af.statusBar.setText("Successfully pasted alignment file"); - - try - { - af.setMaximum(jalview.bin.Cache.getDefault("SHOW_FULLSCREEN", false)); - } - catch (Exception ex) - {} + af.currentFileFormat = currentFileFormat; // WHAT IS THE DEFAULT FORMAT FOR NON-FormatAdapter Sourced Alignments? } - else - { - for (int i = 0; i < sequences.length; i++) - { - alignFrame.viewport.alignment.addSequence(sequences[i]); - - //////////////////////////// - //Dataset needs extension; - ///////////////////////////// - Sequence ds = new Sequence(sequences[i].getName(), - AlignSeq.extractGaps("-. ", - sequences[i].getSequenceAsString()), - sequences[i].getStart(), - sequences[i].getEnd()); - sequences[i].setDatasetSequence(ds); - alignFrame.viewport.alignment.getDataset().addSequence(ds); - } - alignFrame.viewport.setEndSeq(alignFrame.viewport.alignment. - getHeight()); - alignFrame.viewport.alignment.getWidth(); - alignFrame.viewport.firePropertyChange("alignment", null, - alignFrame.viewport. - getAlignment().getSequences()); + if(title==null) + { + title = "Retrieved from " + database.getSelectedItem(); } - if (database.getSelectedItem().equals("PDB")) + Desktop.addInternalFrame(af, + title, + AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + + af.statusBar.setText("Successfully pasted alignment file"); + + try { - // Parse out the ids from the structured names - boolean errors = false; - for (int i = 0; i < sequences.length; i++) - { - PDBEntry entry = new PDBEntry(); - com.stevesoft.pat.Regex idbits = new com.stevesoft.pat.Regex( - "PDB\\|([0-9A-z]{4})\\|(.)"); - if (idbits.search(sequences[i].getName())) - { - String pdbid = idbits.substring(1); - String pdbccode = idbits.substring(2); - // Construct the PDBEntry - entry.setId(pdbid); - if (entry.getProperty() == null) - entry.setProperty(new Hashtable()); - entry.getProperty().put("chains", - pdbccode - + "=" + sequences[i].getStart() - + "-" + sequences[i].getEnd()); - sequences[i].getDatasetSequence().addPDBId(entry); - - // We make a DBRefEtntry because we have obtained the PDB file from a verifiable source - // JBPNote - PDB DBRefEntry should also carry the chain and mapping information - DBRefEntry dbentry = new DBRefEntry(jalview.datamodel.DBRefSource.PDB,"0",pdbid); - sequences[i].getDatasetSequence().addDBRef(dbentry); - } - else - { - // don't add an entry for this chain, but this is probably a bug - // that the user should know about. - jalview.bin.Cache.log.warn( - "No PDBEntry constructed for sequence " + i + " : " + - sequences[i].getName()); - errors = true; - } - } - if (errors) - jalview.bin.Cache.log.warn( - "Query string that resulted in PDBEntry construction failure was :\n" + - textfield.getText()); + af.setMaximum(jalview.bin.Cache.getDefault("SHOW_FULLSCREEN", false)); } - + catch (Exception ex) + {} } else - showErrorMessage("Error retrieving " + textfield.getText() - + " from " + database.getSelectedItem()); + { + for (int i = 0; i < al.getHeight(); i++) + { + alignFrame.viewport.alignment.addSequence(al.getSequenceAt(i)); // this also creates dataset sequence entries + } + alignFrame.viewport.setEndSeq(alignFrame.viewport.alignment. + getHeight()); + alignFrame.viewport.alignment.getWidth(); + alignFrame.viewport.firePropertyChange("alignment", null, + alignFrame.viewport. + getAlignment().getSequences()); + } } - - return sequences; - + return al; } void showErrorMessage(final String error) @@ -494,8 +597,8 @@ public class SequenceFetcher public void run() { JOptionPane.showInternalMessageDialog(Desktop.desktop, - error, "Error Retrieving Data", - JOptionPane.WARNING_MESSAGE); + error, "Error Retrieving Data", + JOptionPane.WARNING_MESSAGE); } }); }