retrieve and parse EMBL records to view exons.
authorjprocter <Jim Procter>
Mon, 19 Mar 2007 15:40:42 +0000 (15:40 +0000)
committerjprocter <Jim Procter>
Mon, 19 Mar 2007 15:40:42 +0000 (15:40 +0000)
src/jalview/gui/SequenceFetcher.java

index ce366c8..6687020 100755 (executable)
@@ -27,10 +27,13 @@ import javax.swing.*;
 
 import MCview.*;
 import jalview.datamodel.*;
+import jalview.datamodel.xdb.embl.*;
+import jalview.analysis.*;
+import java.io.File;
 import jalview.io.*;
 
 public class SequenceFetcher
-    extends JPanel implements Runnable
+extends JPanel implements Runnable
 {
   JInternalFrame frame;
   AlignFrame alignFrame;
@@ -70,11 +73,11 @@ public class SequenceFetcher
   private String getFrameTitle()
   {
     return ( (alignFrame == null) ? "New " : "Additional ") +
-        "Sequence Fetcher";
+    "Sequence Fetcher";
   }
 
   private void jbInit()
-      throws Exception
+  throws Exception
   {
     this.setLayout(gridBagLayout1);
 
@@ -83,7 +86,7 @@ public class SequenceFetcher
     database.setPreferredSize(new Dimension(160, 21));
     jLabel1.setFont(new java.awt.Font("Verdana", Font.ITALIC, 11));
     jLabel1.setText(
-        "Separate multiple accession ids with semi colon \";\"");
+    "Separate multiple accession ids with semi colon \";\"");
     ok.setText("OK");
     ok.addActionListener(new ActionListener()
     {
@@ -111,21 +114,21 @@ public class SequenceFetcher
     jPanel1.add(ok);
     jPanel1.add(close);
     this.add(jLabel1, new GridBagConstraints(0, 0, 2, 1, 0.0, 0.0
-                                             , GridBagConstraints.WEST,
-                                             GridBagConstraints.NONE,
-                                             new Insets(7, 4, 0, 6), 77, 6));
+        , GridBagConstraints.WEST,
+        GridBagConstraints.NONE,
+        new Insets(7, 4, 0, 6), 77, 6));
     this.add(jPanel1, new GridBagConstraints(0, 2, 2, 1, 1.0, 1.0
-                                             , GridBagConstraints.WEST,
-                                             GridBagConstraints.BOTH,
-                                             new Insets(7, -2, 7, 12), 241, -2));
+        , GridBagConstraints.WEST,
+        GridBagConstraints.BOTH,
+        new Insets(7, -2, 7, 12), 241, -2));
     this.add(database, new GridBagConstraints(0, 1, 1, 1, 1.0, 0.0
-                                              , GridBagConstraints.WEST,
-                                              GridBagConstraints.NONE,
-                                              new Insets(0, 4, 0, 0), 1, 0));
+        , GridBagConstraints.WEST,
+        GridBagConstraints.NONE,
+        new Insets(0, 4, 0, 0), 1, 0));
     this.add(textfield, new GridBagConstraints(1, 1, 1, 1, 1.0, 0.0
-                                               , GridBagConstraints.CENTER,
-                                               GridBagConstraints.NONE,
-                                               new Insets(0, 0, 0, 6), 211, 1));
+        , GridBagConstraints.CENTER,
+        GridBagConstraints.NONE,
+        new Insets(0, 0, 0, 6), 211, 1));
   }
 
   JComboBox database = new JComboBox();
@@ -190,40 +193,67 @@ public class SequenceFetcher
       getUniprotFile(textfield.getText());
     }
     else if (database.getSelectedItem().equals("EMBL")
-             || database.getSelectedItem().equals("EMBLCDS"))
+        || database.getSelectedItem().equals("EMBLCDS"))
     {
+      String DBRefSource = database.getSelectedItem().equals("EMBLCDS") 
+      ? jalview.datamodel.DBRefSource.EMBLCDS
+          : jalview.datamodel.DBRefSource.EMBL;
+
       StringTokenizer st = new StringTokenizer(textfield.getText(), ";");
-      while (st.hasMoreTokens())
+      SequenceI[] seqs = null;
+      while(st.hasMoreTokens())
       {
         EBIFetchClient dbFetch = new EBIFetchClient();
 
-        String[] reply = dbFetch.fetchData(
+        File reply = dbFetch.fetchDataAsFile(
             database.getSelectedItem().toString().toLowerCase(
             ) + ":" + st.nextToken(),
-            "fasta", "raw");
-//
-        if (reply != null)
+            "emblxml",null);
+
+        jalview.datamodel.xdb.embl.EmblFile efile=null;
+        if (reply != null && reply.exists())
         {
-          for (int i = 0; i < reply.length; i++)
-          {
-            result.append(reply[i] + "\n");
-          }
+          efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply); 
         }
-      }
-
-      if (result != null && result.length() > 1) // arbitrary minimum length for a seuqence file
-      {
-        System.out.println(result.toString());
+        if (efile!=null) {
+          for (Iterator i=efile.getEntries().iterator(); i.hasNext(); ) {
+            EmblEntry entry = (EmblEntry) i.next();
+            SequenceI[] seqparts = entry.getSequences(false,true, DBRefSource);
+            if (seqparts!=null) {
+              SequenceI[] newseqs = null;
+              int si=0;
+              if (seqs==null) {
+                newseqs = new SequenceI[seqparts.length];
+              } else {
+                newseqs  = new SequenceI[seqs.length+seqparts.length];
+
+                for (;si<seqs.length; si++) {
+                  newseqs[si] = seqs[si];
+                  seqs[si] = null;
+                }
+              }
+              for (int j=0;j<seqparts.length; si++, j++) {
+                newseqs[si] = seqparts[j].deriveSequence(); // place DBReferences on dataset and refer
+              }
+              seqs=newseqs;
 
-        parseResult(result.toString(), null);
+            }
+          }
+        } else {
+          result=null;
+        }
       }
+      if (seqs!=null && seqs.length>0) {
+        if (parseResult(new Alignment(seqs), null, null)!=null)
+          result.append("# Successfully parsed the "+database.getSelectedItem()+" Queries into an Alignment");
+      }    
     }
     else if (database.getSelectedItem().equals("PDB"))
     {
       StringTokenizer qset = new StringTokenizer(textfield.getText(), ";");
       String query;
       SequenceI[] seqs = null;
-      while (qset.hasMoreTokens() && ( (query = qset.nextToken()) != null))
+      while (qset.hasMoreTokens() && ((query = qset.nextToken())!=null))
       {
         SequenceI[] seqparts = getPDBFile(query.toUpperCase());
         if (seqparts != null)
@@ -234,43 +264,43 @@ public class SequenceFetcher
           }
           else
           {
-            SequenceI[] newseqs = new SequenceI[seqs.length + seqparts.length];
-            int i = 0;
+            SequenceI[] newseqs = new SequenceI[seqs.length+seqparts.length];
+            int i=0;
             for (; i < seqs.length; i++)
             {
               newseqs[i] = seqs[i];
               seqs[i] = null;
             }
-            for (int j = 0; j < seqparts.length; i++, j++)
+            for (int j=0;j<seqparts.length; i++, j++)
             {
               newseqs[i] = seqparts[j];
             }
-            seqs = newseqs;
+            seqs=newseqs;
           }
-          result.append("# Success for " + query.toUpperCase() + "\n");
+          result.append("# Success for "+query.toUpperCase()+"\n");
         }
       }
       if (seqs != null && seqs.length > 0)
       {
-        if (parseResult(new Alignment(seqs), null, null) != null)
+        if (parseResult(new Alignment(seqs), null, null)!=null)
         {
           result.append(
-              "# Successfully parsed the PDB File Queries into an Alignment");
+          "# Successfully parsed the PDB File Queries into an Alignment");
         }
       }
     }
-    else if (database.getSelectedItem().equals("PFAM"))
+    else if( database.getSelectedItem().equals("PFAM"))
     {
       try
       {
         result.append(new FastaFile(
             "http://www.sanger.ac.uk/cgi-bin/Pfam/getalignment.pl?format=fal&acc="
-            + textfield.getText().toUpperCase(), "URL").print()
-            );
+            +  textfield.getText().toUpperCase(), "URL").print()
+        );
 
-        if (result.length() > 0)
+        if(result.length()>0)
         {
-          parseResult(result.toString(), textfield.getText().toUpperCase());
+          parseResult( result.toString(), textfield.getText().toUpperCase() );
         }
 
       }
@@ -283,7 +313,7 @@ public class SequenceFetcher
     if (result == null || result.length() == 0)
     {
       showErrorMessage("Error retrieving " + textfield.getText()
-                       + " from " + database.getSelectedItem());
+          + " from " + database.getSelectedItem());
     }
 
     resetDialog();
@@ -326,7 +356,7 @@ public class SequenceFetcher
         }
 
         result.append(name + "\n" + entry.getUniprotSequence().getContent() +
-                      "\n");
+        "\n");
 
       }
 
@@ -357,6 +387,9 @@ public class SequenceFetcher
               en2.nextElement().toString()));
         }
 
+
+
+
         al.getSequenceAt(i).getDatasetSequence().setPDBId(onlyPdbEntries);
         if (entry.getFeature() != null)
         {
@@ -365,7 +398,7 @@ public class SequenceFetcher
           {
             SequenceFeature sf = (SequenceFeature) e.nextElement();
             sf.setFeatureGroup("Uniprot");
-            al.getSequenceAt(i).getDatasetSequence().addSequenceFeature(sf);
+            al.getSequenceAt(i).getDatasetSequence().addSequenceFeature( sf );
           }
         }
       }
@@ -384,7 +417,7 @@ public class SequenceFetcher
 
     EBIFetchClient ebi = new EBIFetchClient();
     String file = ebi.fetchDataAsFile("pdb:" + id, "pdb", "raw").
-        getAbsolutePath();
+    getAbsolutePath();
     if (file == null)
     {
       return null;
@@ -402,26 +435,26 @@ public class SequenceFetcher
           // Get the Chain's Sequence - who's dataset includes any special features added from the PDB file
           SequenceI sq = pdbchain.sequence;
           // Specially formatted name for the PDB chain sequences retrieved from the PDB
-          sq.setName("PDB|" + id + "|" + sq.getName());
+          sq.setName("PDB|"+id+"|"+sq.getName());
           // Might need to add more metadata to the PDBEntry object
           // like below
           /*
            * PDBEntry entry = new PDBEntry();
-                       // Construct the PDBEntry
-                       entry.setId(id);
-                       if (entry.getProperty() == null)
-              entry.setProperty(new Hashtable());
-                       entry.getProperty().put("chains",
-                      pdbchain.id
-                      + "=" + sq.getStart()
-                      + "-" + sq.getEnd());
-                       sq.getDatasetSequence().addPDBId(entry);
+            // Construct the PDBEntry
+            entry.setId(id);
+            if (entry.getProperty() == null)
+                entry.setProperty(new Hashtable());
+            entry.getProperty().put("chains",
+                        pdbchain.id
+                        + "=" + sq.getStart()
+                        + "-" + sq.getEnd()); 
+            sq.getDatasetSequence().addPDBId(entry);
            */
           // Add PDB DB Refs
           // We make a DBRefEtntry because we have obtained the PDB file from a verifiable source
           // JBPNote - PDB DBRefEntry should also carry the chain and mapping information
           DBRefEntry dbentry = new DBRefEntry(jalview.datamodel.DBRefSource.PDB,
-                                              "0", id + pdbchain.id);
+              "0", id + pdbchain.id);
           sq.addDBRef(dbentry);
           // and add seuqence to the retrieved set
           result.addElement(sq.deriveSequence());
@@ -431,19 +464,18 @@ public class SequenceFetcher
     catch (Exception ex) // Problem parsing PDB file
     {
       jalview.bin.Cache.log.warn("Exception when retrieving " +
-                                 textfield.getText() + " from " +
-                                 database.getSelectedItem(), ex);
+          textfield.getText() + " from " +
+          database.getSelectedItem(), ex);
       return null;
     }
     SequenceI[] results = new SequenceI[result.size()];
     for (int i = 0, j = result.size(); i < j; i++)
     {
       results[i] = (SequenceI) result.elementAt(i);
-      result.setElementAt(null, i);
+      result.setElementAt(null,i);
     }
     return results;
   }
-
   Alignment parseResult(String result, String title)
   {
     String format = new IdentifyFile().Identify(result, "Paste");
@@ -454,12 +486,12 @@ public class SequenceFetcher
       try
       {
         sequences = new FormatAdapter().readFile(result.toString(), "Paste",
-                                                 format);
+            format);
       }
       catch (Exception ex)
       {}
 
-      if (sequences != null)
+      if (sequences!=null)
       {
         return parseResult(sequences, title, format);
       }
@@ -467,7 +499,7 @@ public class SequenceFetcher
     else
     {
       showErrorMessage("Error retrieving " + textfield.getText()
-                       + " from " + database.getSelectedItem());
+          + " from " + database.getSelectedItem());
     }
 
     return null;
@@ -481,22 +513,22 @@ public class SequenceFetcher
       if (alignFrame == null)
       {
         AlignFrame af = new AlignFrame(al,
-                                       AlignFrame.DEFAULT_WIDTH,
-                                       AlignFrame.DEFAULT_HEIGHT);
-        if (currentFileFormat != null)
+            AlignFrame.DEFAULT_WIDTH,
+            AlignFrame.DEFAULT_HEIGHT);
+        if (currentFileFormat!=null)
         {
           af.currentFileFormat = currentFileFormat; // WHAT IS THE DEFAULT FORMAT FOR NON-FormatAdapter Sourced Alignments?
         }
 
-        if (title == null)
+        if(title==null)
         {
           title = "Retrieved from " + database.getSelectedItem();
         }
 
         Desktop.addInternalFrame(af,
-                                 title,
-                                 AlignFrame.DEFAULT_WIDTH,
-                                 AlignFrame.DEFAULT_HEIGHT);
+            title,
+            AlignFrame.DEFAULT_WIDTH,
+            AlignFrame.DEFAULT_HEIGHT);
 
         af.statusBar.setText("Successfully pasted alignment file");
 
@@ -514,11 +546,11 @@ public class SequenceFetcher
           alignFrame.viewport.alignment.addSequence(al.getSequenceAt(i)); // this also creates dataset sequence entries
         }
         alignFrame.viewport.setEndSeq(alignFrame.viewport.alignment.
-                                      getHeight());
+            getHeight());
         alignFrame.viewport.alignment.getWidth();
         alignFrame.viewport.firePropertyChange("alignment", null,
-                                               alignFrame.viewport.
-                                               getAlignment().getSequences());
+            alignFrame.viewport.
+            getAlignment().getSequences());
       }
     }
     return al;
@@ -532,9 +564,10 @@ public class SequenceFetcher
       public void run()
       {
         JOptionPane.showInternalMessageDialog(Desktop.desktop,
-                                              error, "Error Retrieving Data",
-                                              JOptionPane.WARNING_MESSAGE);
+            error, "Error Retrieving Data",
+            JOptionPane.WARNING_MESSAGE);
       }
     });
   }
 }
+