X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=adfa96711bdde05cf30160b74933c1b84f8a11e5;hb=ba0711d9cab4854f27589fc58ef3f0fb4cba3908;hp=4c1ba9ee3ec25e575de461095c6066f969bce4e9;hpb=6cbe1876d4a5fdd7d5b73e11bf2468fe4e75ce99;p=jalview.git

diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java
index 4c1ba9e..adfa967 100755
--- a/src/jalview/io/FastaFile.java
+++ b/src/jalview/io/FastaFile.java
@@ -1,148 +1,186 @@
+/*
+* Jalview - A Sequence Alignment Editor and Viewer
+* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+*/
 package jalview.io;
 
-import jalview.datamodel.*;
 import jalview.analysis.*;
 
+import jalview.datamodel.*;
+
 import java.io.*;
+
 import java.util.*;
 
+
 public class FastaFile extends AlignFile {
+    public FastaFile() {
+    }
 
-  public FastaFile()
-  {}
+    public FastaFile(String inStr) {
+        super(inStr);
+    }
 
-  public FastaFile(String inStr) {
-    super(inStr);
-  }
+    public FastaFile(String inFile, String type) throws IOException {
+        super(inFile, type);
+    }
 
-  public FastaFile(String inFile, String type) throws IOException {
-    super(inFile,type);
-  }
+    public void parse() throws IOException {
+        String id = "";
+        StringBuffer seq = new StringBuffer();
+        int count = 0;
+        boolean flag = false;
+
+        int sstart = 0;
+        int send = 0;
+
+        String line;
+
+        while ((line = nextLine()) != null) {
+            if (line.length() > 0) {
+                // Do we have an id line?
+                // JBPNote - this code needs to be standardised to EBI/whatever for the
+                // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
+
+                if (line.substring(0, 1).equals(">")) {
+                    if (count != 0) {
+                        if (sstart != 0) {
+                            seqs.addElement(new Sequence(id,
+                                    seq.toString().toUpperCase(), sstart, send));
+                        } else {
+                            seqs.addElement(new Sequence(id,
+                                    seq.toString().toUpperCase(), 1,
+                                    seq.length()));
+                        }
+                    }
+
+                    count++;
+
+                    StringTokenizer str = new StringTokenizer(line, " ");
+
+                    id = str.nextToken();
+                    id = id.substring(1);
+
+                    com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
+                            "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
+                    // JBPNote At the moment - we don't get rid of the friendly names but this
+                    // behaviour is probably wrong in the long run.
+                    if (dbId.search(id)) {
+                        String dbid = dbId.stringMatched(1);
+                        String idname = dbId.stringMatched(2);
+                        if ( (idname.length() > 0) &&
+                             (idname.indexOf("_") > -1)) {
+                          id = idname; // use the friendly name - apparently no dbid
+                        } else
+                        if (dbid.length()>1) {
+                            id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
+                        }
+                    }
+
+                    if (id.indexOf("/") > 0) {
+                        StringTokenizer st = new StringTokenizer(id, "/");
+
+                        if (st.countTokens() == 2) {
+                            id = st.nextToken();
+
+                            String tmp = st.nextToken();
+
+                            st = new StringTokenizer(tmp, "-");
+
+                            if (st.countTokens() == 2) {
+                                sstart = Integer.valueOf(st.nextToken())
+                                                .intValue();
+                                send = Integer.valueOf(st.nextToken()).intValue();
+                            }
+                        }
+                    }
+
+                    seq = new StringBuffer();
+                } else {
+                    seq = seq.append(line);
+                }
+            }
+        }
 
-  public void parse() throws IOException
-  {
+        if (count > 0) {
+            if (!isValidProteinSequence(seq.toString().toUpperCase())) {
+                throw new IOException("Invalid protein sequence");
+            }
 
-    String       id    = "";
-    StringBuffer seq   = new StringBuffer();
-    int          count = 0;
-    boolean      flag  = false;
+            if (sstart != 0) {
+                seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
+                        sstart, send));
+            } else {
+                seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
+                        1, seq.length()));
+            }
+        }
+    }
 
-    int          sstart = 0;
-    int          send   = 0;
+    public static String print(SequenceI[] s) {
+        return print(s, 72);
+    }
 
-    String line;
+    public static String print(SequenceI[] s, int len) {
+        return print(s, len, true);
+    }
 
-      while ((line = nextLine()) != null) {
+    public static String print(SequenceI[] s, int len, boolean gaps) {
+        return print(s, len, gaps, true);
+    }
 
-	if (line.length() > 0) {
+    public static String print(SequenceI[] s, int len, boolean gaps,
+        boolean displayId) {
+        StringBuffer out = new StringBuffer();
+        int i = 0;
 
-	  // Do we have an id line?
+        while ((i < s.length) && (s[i] != null)) {
+            String seq = "";
 
-	  if (line.substring(0,1).equals(">")) {
+            if (gaps) {
+                seq = s[i].getSequence();
+            } else {
+                seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
+            }
 
-	    if (count != 0) {
-	      if (sstart != 0) {
-		seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
-	      } else {
-		seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
-	      }
-	    }
+            // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
+            out.append(">" +
+                ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
 
-	    count++;
+            int nochunks = (seq.length() / len) + 1;
 
-	    StringTokenizer str = new StringTokenizer(line," ");
+            for (int j = 0; j < nochunks; j++) {
+                int start = j * len;
+                int end = start + len;
 
-	    id = str.nextToken();
-	    id = id.substring(1);
-           if(id.indexOf("UniProt/Swiss-Prot")>-1)
-            {
-              id = id.substring(id.indexOf("UniProt/Swiss-Prot|") + 19);
-              if(id.indexOf("|")>-1)
-                id = id.substring(id.indexOf("|") + 1);
+                if (end < seq.length()) {
+                    out.append(seq.substring(start, end) + "\n");
+                } else if (start < seq.length()) {
+                    out.append(seq.substring(start) + "\n");
+                }
             }
 
-	    if (id.indexOf("/") > 0 ) {
-
-	      StringTokenizer st = new StringTokenizer(id,"/");
-	      if (st.countTokens() == 2) {
-		id = st.nextToken();
-		String tmp = st.nextToken();
-
-		st = new StringTokenizer(tmp,"-");
-
-		if (st.countTokens() == 2) {
-		  sstart = Integer.valueOf(st.nextToken()).intValue();
-		  send   = Integer.valueOf(st.nextToken()).intValue();
-		}
-	      }
-	    }
-
-	    seq = new StringBuffer();
-
-	  } else {
-	    seq = seq.append(line);
-	  }
-	}
-      }
-      if (count > 0) {
-
-        if(!isValidProteinSequence(seq.toString().toUpperCase()))
-          throw new IOException("Invalid protein sequence");
-
-	if (sstart != 0) {
-	  seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send));
-	} else {
-	  seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length()));
-	}
-      }
-
-  }
-
-  public static String print(SequenceI[] s) {
-    return print(s,72);
-  }
-  public static String print(SequenceI[] s, int len) {
-    return print(s,len,true);
-  }
-
-  public static String print(SequenceI[] s, int len,boolean gaps) {
-    return print(s,len,gaps,true);
-  }
-
-  public static String print(SequenceI[] s, int len,boolean gaps, boolean displayId) {
-    StringBuffer out = new StringBuffer();
-    int i = 0;
-    while (i < s.length && s[i] != null) {
-      String seq = "";
-      if (gaps) {
-        seq = s[i].getSequence();
-      } else {
-        seq = AlignSeq.extractGaps("-. ",s[i].getSequence());
-      }
-      // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
-      out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName())+"\n");
-
-      int nochunks = seq.length() / len + 1;
-
-      for (int j = 0; j < nochunks; j++) {
-        int start = j*len;
-        int end = start + len;
-
-        if (end < seq.length()) {
-          out.append(seq.substring(start,end) + "\n");
-        } else if (start < seq.length()) {
-          out.append(seq.substring(start) + "\n");
+            i++;
         }
-      }
-      i++;
+
+        return out.toString();
     }
-    return out.toString();
-  }
 
-  public String print() {
-    return print(getSeqsAsArray());
-  }
+    public String print() {
+        return print(getSeqsAsArray());
+    }
 }
-
-
-