X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=6e3ced88635f906442f3107133741b70732f9214;hb=174230b4233d9ce80f94527768d2cd2f76da11ab;hp=cd287ffb9c4a3c227c3ac1f66416a7d10cf3430d;hpb=1470c861a3f837c5206837a89d0b932d2207fcc8;p=jalview.git diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index cd287ff..6e3ced8 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -1,156 +1,170 @@ +/* +* Jalview - A Sequence Alignment Editor and Viewer +* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ package jalview.io; import jalview.datamodel.*; -import jalview.analysis.*; import java.io.*; -import java.util.*; -public class FastaFile extends AlignFile { - public FastaFile() - {} +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class FastaFile extends AlignFile +{ + /** + * Creates a new FastaFile object. + */ + public FastaFile() + { + } - public FastaFile(String inStr) { - super(inStr); - } + /** + * Creates a new FastaFile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public FastaFile(String inFile, String type) throws IOException + { + super(inFile, type); + } - public FastaFile(String inFile, String type) throws IOException { - super(inFile,type); - } + /** + * DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public void parse() throws IOException + { + StringBuffer sb = new StringBuffer(); + int count = 0; + + String line; + Sequence seq = null; + + while ((line = nextLine()) != null) + { + line = line.trim(); + if (line.length() > 0) + { + if (line.charAt(0)=='>') + { + if (count != 0) + { + if (!isValidProteinSequence(sb.toString())) + { + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); + } + + seq.setSequence(sb.toString()); + seqs.addElement(seq); + } + + seq = parseId(line.substring(1)); + + count++; + sb = new StringBuffer(); + } + else + { + sb.append(line); + } + } + } - public void parse() throws IOException - { - - String id = ""; - StringBuffer seq = new StringBuffer(); - int count = 0; - boolean flag = false; - - int sstart = 0; - int send = 0; - - String line; - - while ((line = nextLine()) != null) { - - if (line.length() > 0) { - - // Do we have an id line? - - if (line.substring(0,1).equals(">")) { - - if (count != 0) { - if (sstart != 0) { - seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send)); - } else { - seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length())); - } - } - - count++; - - StringTokenizer str = new StringTokenizer(line," "); - - id = str.nextToken(); - id = id.substring(1); - com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex("[A-Za-z-]+/[A-Za-z-]+\\|(\\w+)\\|(.+)"); - if (dbId.search(id)) - { - String dbid = dbId.stringMatched(1); - String idname = dbId.stringMatched(2); - if (idname.length()>0 && idname.indexOf("_") > -1) - { - id = idname; // just use friendly name // JBPNote: we may lose uniprot standardised ID here. - } - else - { - id = dbid; // use dbid to ensure sensible queries - } - - } - if (id.indexOf("/") > 0 ) { - - StringTokenizer st = new StringTokenizer(id,"/"); - if (st.countTokens() == 2) { - id = st.nextToken(); - String tmp = st.nextToken(); - - st = new StringTokenizer(tmp,"-"); - - if (st.countTokens() == 2) { - sstart = Integer.valueOf(st.nextToken()).intValue(); - send = Integer.valueOf(st.nextToken()).intValue(); - } - } - } - - seq = new StringBuffer(); - - } else { - seq = seq.append(line); - } - } - } - if (count > 0) { - - if(!isValidProteinSequence(seq.toString().toUpperCase())) - throw new IOException("Invalid protein sequence"); - - if (sstart != 0) { - seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),sstart,send)); - } else { - seqs.addElement(new Sequence(id,seq.toString().toUpperCase(),1,seq.length())); - } - } - - } - - public static String print(SequenceI[] s) { - return print(s,72); - } - public static String print(SequenceI[] s, int len) { - return print(s,len,true); - } - - public static String print(SequenceI[] s, int len,boolean gaps) { - return print(s,len,gaps,true); - } - - public static String print(SequenceI[] s, int len,boolean gaps, boolean displayId) { - StringBuffer out = new StringBuffer(); - int i = 0; - while (i < s.length && s[i] != null) { - String seq = ""; - if (gaps) { - seq = s[i].getSequence(); - } else { - seq = AlignSeq.extractGaps("-. ",s[i].getSequence()); - } - // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + - out.append(">" + ((displayId) ? s[i].getDisplayId() : s[i].getName())+"\n"); - - int nochunks = seq.length() / len + 1; - - for (int j = 0; j < nochunks; j++) { - int start = j*len; - int end = start + len; - - if (end < seq.length()) { - out.append(seq.substring(start,end) + "\n"); - } else if (start < seq.length()) { - out.append(seq.substring(start) + "\n"); + if (count > 0) + { + if (!isValidProteinSequence(sb.toString())) + { + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); + } + + seq.setSequence(sb.toString()); + seqs.addElement(seq); } - } - i++; } - return out.toString(); - } - public String print() { - return print(getSeqsAsArray()); - } -} + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * @param len DOCUMENT ME! + * @param gaps DOCUMENT ME! + * @param displayId DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print(SequenceI[] s) + { + int len = 72; + StringBuffer out = new StringBuffer(); + int i = 0; + + while ((i < s.length) && (s[i] != null)) + { + out.append(">" + printId(s[i])); + if(s[i].getDescription()!=null) + out.append(" "+s[i].getDescription()); + + out.append("\n"); + + int nochunks = (s[i].getLength() / len) + 1; + + for (int j = 0; j < nochunks; j++) + { + int start = j * len; + int end = start + len; + + if (end < s[i].getLength()) + { + out.append(s[i].getSequence(start, end) + "\n"); + } + else if (start < s[i].getLength()) + { + out.append(s[i].getSequence(start, s[i].getLength()) + "\n"); + } + } + + i++; + } + return out.toString(); + } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print() + { + return print(getSeqsAsArray()); + } +}