X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=d8890afe8909831decb9e7f68bf011c2db726073;hb=b2f9a8d7bce642ff4011bc6d49e02bb0569fbb11;hp=757bb57dbaac990bcf6fb3b16d476e338d4941bf;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git
diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java
index 757bb57..d8890af 100755
--- a/src/jalview/io/FastaFile.java
+++ b/src/jalview/io/FastaFile.java
@@ -1,289 +1,238 @@
-/*
-* Jalview - A Sequence Alignment Editor and Viewer
-* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License
-* as published by the Free Software Foundation; either version 2
-* of the License, or (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-*/
-package jalview.io;
-
-import jalview.analysis.*;
-
-import jalview.datamodel.*;
-
-import java.io.*;
-
-import java.util.*;
-
-
-/**
- * DOCUMENT ME!
- *
- * @author $author$
- * @version $Revision$
- */
-public class FastaFile extends AlignFile
-{
- /**
- * Creates a new FastaFile object.
- */
- public FastaFile()
- {
- }
-
- /**
- * Creates a new FastaFile object.
- *
- * @param inStr DOCUMENT ME!
- */
- public FastaFile(String inStr)
- {
- super(inStr);
- }
-
- /**
- * Creates a new FastaFile object.
- *
- * @param inFile DOCUMENT ME!
- * @param type DOCUMENT ME!
- *
- * @throws IOException DOCUMENT ME!
- */
- public FastaFile(String inFile, String type) throws IOException
- {
- super(inFile, type);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @throws IOException DOCUMENT ME!
- */
- public void parse() throws IOException
- {
- String id = "";
- StringBuffer seq = new StringBuffer();
- int count = 0;
-
- int sstart = 0;
- int send = 0;
-
- String line;
-
- while ((line = nextLine()) != null)
- {
- if (line.length() > 0)
- {
- // Do we have an id line?
- // JBPNote - this code needs to be standardised to EBI/whatever for the
- // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)
- if (line.substring(0, 1).equals(">"))
- {
- if (count != 0)
- {
- if (sstart != 0)
- {
- seqs.addElement(new Sequence(id, seq.toString(),
- sstart, send));
- }
- else
- {
- seqs.addElement(new Sequence(id, seq.toString(), 1,
- seq.length()));
- }
- }
-
- count++;
-
- StringTokenizer str = new StringTokenizer(line, " ");
-
- id = str.nextToken();
- id = id.substring(1);
-
- com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(
- "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");
-
- // JBPNote At the moment - we don't get rid of the friendly names but this
- // behaviour is probably wrong in the long run.
- if (dbId.search(id))
- {
- String dbid = dbId.stringMatched(1);
- String idname = dbId.stringMatched(2);
-
- if ((idname.length() > 0) &&
- (idname.indexOf("_") > -1))
- {
- id = idname; // use the friendly name - apparently no dbid
- }
- else if (dbid.length() > 1)
- {
- id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise
- }
- }
-
- if (id.indexOf("/") > 0)
- {
- StringTokenizer st = new StringTokenizer(id, "/");
-
- if (st.countTokens() == 2)
- {
- id = st.nextToken();
-
- String tmp = st.nextToken();
-
- st = new StringTokenizer(tmp, "-");
-
- if (st.countTokens() == 2)
- {
- sstart = Integer.valueOf(st.nextToken())
- .intValue();
- send = Integer.valueOf(st.nextToken()).intValue();
- }
- }
- }
-
- seq = new StringBuffer();
- }
- else
- {
- seq = seq.append(line);
- }
- }
- }
-
- if (count > 0)
- {
- if (!isValidProteinSequence(seq.toString().toUpperCase()))
- {
- throw new IOException("Invalid protein sequence");
- }
-
- if (sstart != 0)
- {
- seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
- sstart, send));
- }
- else
- {
- seqs.addElement(new Sequence(id, seq.toString().toUpperCase(),
- 1, seq.length()));
- }
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param s DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public static String print(SequenceI[] s)
- {
- return print(s, 72);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param s DOCUMENT ME!
- * @param len DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public static String print(SequenceI[] s, int len)
- {
- return print(s, len, true);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param s DOCUMENT ME!
- * @param len DOCUMENT ME!
- * @param gaps DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public static String print(SequenceI[] s, int len, boolean gaps)
- {
- return print(s, len, gaps, true);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param s DOCUMENT ME!
- * @param len DOCUMENT ME!
- * @param gaps DOCUMENT ME!
- * @param displayId DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public static String print(SequenceI[] s, int len, boolean gaps,
- boolean displayId)
- {
- StringBuffer out = new StringBuffer();
- int i = 0;
-
- while ((i < s.length) && (s[i] != null))
- {
- String seq = "";
-
- if (gaps)
- {
- seq = s[i].getSequence();
- }
- else
- {
- seq = AlignSeq.extractGaps("-. ", s[i].getSequence());
- }
-
- // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() +
- out.append(">" +
- ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n");
-
- int nochunks = (seq.length() / len) + 1;
-
- for (int j = 0; j < nochunks; j++)
- {
- int start = j * len;
- int end = start + len;
-
- if (end < seq.length())
- {
- out.append(seq.substring(start, end) + "\n");
- }
- else if (start < seq.length())
- {
- out.append(seq.substring(start) + "\n");
- }
- }
-
- i++;
- }
-
- return out.toString();
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public String print()
- {
- return print(getSeqsAsArray());
- }
-}
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1)
+ * Copyright (C) 2014 The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import java.io.*;
+
+import jalview.datamodel.*;
+
+/**
+ * DOCUMENT ME!
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class FastaFile extends AlignFile
+{
+ /**
+ * Length of a sequence line
+ */
+ int len = 72;
+
+ StringBuffer out;
+
+ /**
+ * Creates a new FastaFile object.
+ */
+ public FastaFile()
+ {
+ }
+
+ /**
+ * Creates a new FastaFile object.
+ *
+ * @param inFile
+ * DOCUMENT ME!
+ * @param type
+ * DOCUMENT ME!
+ *
+ * @throws IOException
+ * DOCUMENT ME!
+ */
+ public FastaFile(String inFile, String type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ public FastaFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @throws IOException
+ * DOCUMENT ME!
+ */
+ public void parse() throws IOException
+ {
+ StringBuffer sb = new StringBuffer();
+ boolean firstLine = true;
+
+ String line,uline;
+ Sequence seq = null;
+
+ boolean annotation = false;
+
+ while ((uline = nextLine()) != null)
+ {
+ line = uline.trim();
+ if (line.length() > 0)
+ {
+ if (line.charAt(0) == '>')
+ {
+ if (line.startsWith(">#_"))
+ {
+ if (annotation)
+ {
+ annotations.addElement(makeAnnotation(seq, sb));
+ }
+ }
+ else
+ {
+ annotation = false;
+ }
+
+ if (!firstLine)
+ {
+ seq.setSequence(sb.toString());
+
+ if (!annotation)
+ {
+ seqs.addElement(seq);
+ }
+ }
+
+ seq = parseId(line.substring(1));
+ firstLine = false;
+
+ sb = new StringBuffer();
+
+ if (line.startsWith(">#_"))
+ {
+ annotation = true;
+ }
+ }
+ else
+ {
+ sb.append(annotation ? uline : line);
+ }
+ }
+ }
+
+ if (annotation)
+ {
+ annotations.addElement(makeAnnotation(seq, sb));
+ }
+
+ else if (!firstLine)
+ {
+ seq.setSequence(sb.toString());
+ seqs.addElement(seq);
+ }
+ }
+ private AlignmentAnnotation makeAnnotation(SequenceI seq, StringBuffer sb)
+ {
+ Annotation[] anots = new Annotation[sb.length()];
+ char cb;
+ for (int i=0;i" + printId(s[i]));
+ if (s[i].getDescription() != null)
+ {
+ out.append(" " + s[i].getDescription());
+ }
+
+ out.append(newline);
+
+ int nochunks = (s[i].getLength() / len) + 1;
+
+ for (int j = 0; j < nochunks; j++)
+ {
+ int start = j * len;
+ int end = start + len;
+
+ if (end < s[i].getLength())
+ {
+ out.append(s[i].getSequenceAsString(start, end) + newline);
+ }
+ else if (start < s[i].getLength())
+ {
+ out.append(s[i].getSequenceAsString(start, s[i].getLength())
+ + newline);
+ }
+ }
+
+ i++;
+ }
+
+ return out.toString();
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public String print()
+ {
+ return print(getSeqsAsArray());
+ }
+}