X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=6d218f434515fa862b5ec3356704d2ae9f97e301;hb=424a0faf266f649090f17dc65246cdfb4111217e;hp=757bb57dbaac990bcf6fb3b16d476e338d4941bf;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git

diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java
index 757bb57..6d218f4 100755
--- a/src/jalview/io/FastaFile.java
+++ b/src/jalview/io/FastaFile.java
@@ -18,14 +18,10 @@
 */
 package jalview.io;
 
-import jalview.analysis.*;
-
 import jalview.datamodel.*;
 
 import java.io.*;
 
-import java.util.*;
-
 
 /**
  * DOCUMENT ME!
@@ -72,154 +68,58 @@ public class FastaFile extends AlignFile
      */
     public void parse() throws IOException
     {
-        String id = "";
-        StringBuffer seq = new StringBuffer();
+        StringBuffer sb = new StringBuffer();
         int count = 0;
 
-        int sstart = 0;
-        int send = 0;
-
         String line;
+        Sequence seq = null;
 
         while ((line = nextLine()) != null)
         {
+            line = line.trim();
             if (line.length() > 0)
             {
-                // Do we have an id line?
-                // JBPNote - this code needs to be standardised to EBI/whatever for the
-                // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
-                if (line.substring(0, 1).equals(">"))
+                if (line.charAt(0)=='>')
                 {
                     if (count != 0)
                     {
-                        if (sstart != 0)
-                        {
-                            seqs.addElement(new Sequence(id, seq.toString(),
-                                    sstart, send));
-                        }
-                        else
-                        {
-                            seqs.addElement(new Sequence(id, seq.toString(), 1,
-                                    seq.length()));
-                        }
+                      if (!isValidProteinSequence(sb.toString()))
+                      {
+                        throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS
+                                              +" : "+seq.getName()
+                                              +" : "+invalidCharacter);
+                      }
+
+                       seq.setSequence(sb.toString());
+                       seqs.addElement(seq);
                     }
 
-                    count++;
-
-                    StringTokenizer str = new StringTokenizer(line, " ");
-
-                    id = str.nextToken();
-                    id = id.substring(1);
-
-                    com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
-                            "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
-
-                    // JBPNote At the moment - we don't get rid of the friendly names but this
-                    // behaviour is probably wrong in the long run.
-                    if (dbId.search(id))
-                    {
-                        String dbid = dbId.stringMatched(1);
-                        String idname = dbId.stringMatched(2);
-
-                        if ((idname.length() > 0) &&
-                                (idname.indexOf("_") > -1))
-                        {
-                            id = idname; // use the friendly name - apparently no dbid
-                        }
-                        else if (dbid.length() > 1)
-                        {
-                            id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
-                        }
-                    }
-
-                    if (id.indexOf("/") > 0)
-                    {
-                        StringTokenizer st = new StringTokenizer(id, "/");
-
-                        if (st.countTokens() == 2)
-                        {
-                            id = st.nextToken();
-
-                            String tmp = st.nextToken();
+                    seq = parseId(line.substring(1));
 
-                            st = new StringTokenizer(tmp, "-");
-
-                            if (st.countTokens() == 2)
-                            {
-                                sstart = Integer.valueOf(st.nextToken())
-                                                .intValue();
-                                send = Integer.valueOf(st.nextToken()).intValue();
-                            }
-                        }
-                    }
-
-                    seq = new StringBuffer();
+                    count++;
+                    sb = new StringBuffer();
                 }
                 else
                 {
-                    seq = seq.append(line);
+                    sb.append(line);
                 }
             }
         }
 
         if (count > 0)
         {
-            if (!isValidProteinSequence(seq.toString().toUpperCase()))
+            if (!isValidProteinSequence(sb.toString()))
             {
-                throw new IOException("Invalid protein sequence");
+                throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS
+                                      +" : "+seq.getName()
+                                      +" : "+invalidCharacter);
             }
 
-            if (sstart != 0)
-            {
-                seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
-                        sstart, send));
-            }
-            else
-            {
-                seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
-                        1, seq.length()));
-            }
+            seq.setSequence(sb.toString());
+            seqs.addElement(seq);
         }
     }
 
-    /**
-     * DOCUMENT ME!
-     *
-     * @param s DOCUMENT ME!
-     *
-     * @return DOCUMENT ME!
-     */
-    public static String print(SequenceI[] s)
-    {
-        return print(s, 72);
-    }
-
-    /**
-     * DOCUMENT ME!
-     *
-     * @param s DOCUMENT ME!
-     * @param len DOCUMENT ME!
-     *
-     * @return DOCUMENT ME!
-     */
-    public static String print(SequenceI[] s, int len)
-    {
-        return print(s, len, true);
-    }
-
-    /**
-     * DOCUMENT ME!
-     *
-     * @param s DOCUMENT ME!
-     * @param len DOCUMENT ME!
-     * @param gaps DOCUMENT ME!
-     *
-     * @return DOCUMENT ME!
-     */
-    public static String print(SequenceI[] s, int len, boolean gaps)
-    {
-        return print(s, len, gaps, true);
-    }
 
     /**
      * DOCUMENT ME!
@@ -231,43 +131,34 @@ public class FastaFile extends AlignFile
      *
      * @return DOCUMENT ME!
      */
-    public static String print(SequenceI[] s, int len, boolean gaps,
-        boolean displayId)
+    public String print(SequenceI[] s)
     {
+        int len = 72;
         StringBuffer out = new StringBuffer();
         int i = 0;
 
         while ((i < s.length) && (s[i] != null))
         {
-            String seq = "";
-
-            if (gaps)
-            {
-                seq = s[i].getSequence();
-            }
-            else
-            {
-                seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
-            }
+            out.append(">" + printId(s[i]));
+            if(s[i].getDescription()!=null)
+              out.append(" "+s[i].getDescription());
 
-            // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
-            out.append(">" +
-                ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
+            out.append("\n");
 
-            int nochunks = (seq.length() / len) + 1;
+            int nochunks = (s[i].getLength() / len) + 1;
 
             for (int j = 0; j < nochunks; j++)
             {
                 int start = j * len;
                 int end = start + len;
 
-                if (end < seq.length())
+                if (end < s[i].getLength())
                 {
-                    out.append(seq.substring(start, end) + "\n");
+                    out.append(s[i].getSequence(start, end) + "\n");
                 }
-                else if (start < seq.length())
+                else if (start < s[i].getLength())
                 {
-                    out.append(seq.substring(start) + "\n");
+                    out.append(s[i].getSequence(start, s[i].getLength()) + "\n");
                 }
             }