X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=f257c6c2cb1d6ce4a025191ac7662f9f5a5d940f;hb=811f68c1dc1e4c409751b785203ed0a7289529b3;hp=adfa96711bdde05cf30160b74933c1b84f8a11e5;hpb=ea7f74cc42770ce2f0dbb6745cb221eabf98e181;p=jalview.git diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index adfa967..f257c6c 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,159 +18,137 @@ */ package jalview.io; -import jalview.analysis.*; - import jalview.datamodel.*; import java.io.*; -import java.util.*; - - -public class FastaFile extends AlignFile { - public FastaFile() { - } - public FastaFile(String inStr) { - super(inStr); +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ +public class FastaFile extends AlignFile +{ + /** + * Creates a new FastaFile object. + */ + public FastaFile() + { } - public FastaFile(String inFile, String type) throws IOException { + /** + * Creates a new FastaFile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public FastaFile(String inFile, String type) throws IOException + { super(inFile, type); } - public void parse() throws IOException { - String id = ""; - StringBuffer seq = new StringBuffer(); + /** + * DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public void parse() throws IOException + { + StringBuffer sb = new StringBuffer(); int count = 0; - boolean flag = false; - - int sstart = 0; - int send = 0; String line; - - while ((line = nextLine()) != null) { - if (line.length() > 0) { - // Do we have an id line? - // JBPNote - this code needs to be standardised to EBI/whatever for the - // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist) - - if (line.substring(0, 1).equals(">")) { - if (count != 0) { - if (sstart != 0) { - seqs.addElement(new Sequence(id, - seq.toString().toUpperCase(), sstart, send)); - } else { - seqs.addElement(new Sequence(id, - seq.toString().toUpperCase(), 1, - seq.length())); - } + Sequence seq = null; + + while ((line = nextLine()) != null) + { + line = line.trim(); + if (line.length() > 0) + { + if (line.charAt(0)=='>') + { + if (count != 0) + { + if (!isValidProteinSequence(sb.toString().toCharArray())) + { + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); + } + + seq.setSequence(sb.toString()); + seqs.addElement(seq); } - count++; - - StringTokenizer str = new StringTokenizer(line, " "); - - id = str.nextToken(); - id = id.substring(1); - - com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( - "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)"); - // JBPNote At the moment - we don't get rid of the friendly names but this - // behaviour is probably wrong in the long run. - if (dbId.search(id)) { - String dbid = dbId.stringMatched(1); - String idname = dbId.stringMatched(2); - if ( (idname.length() > 0) && - (idname.indexOf("_") > -1)) { - id = idname; // use the friendly name - apparently no dbid - } else - if (dbid.length()>1) { - id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise - } - } - - if (id.indexOf("/") > 0) { - StringTokenizer st = new StringTokenizer(id, "/"); - - if (st.countTokens() == 2) { - id = st.nextToken(); - - String tmp = st.nextToken(); - - st = new StringTokenizer(tmp, "-"); + seq = parseId(line.substring(1)); - if (st.countTokens() == 2) { - sstart = Integer.valueOf(st.nextToken()) - .intValue(); - send = Integer.valueOf(st.nextToken()).intValue(); - } - } - } - - seq = new StringBuffer(); - } else { - seq = seq.append(line); + count++; + sb = new StringBuffer(); + } + else + { + sb.append(line); } } } - if (count > 0) { - if (!isValidProteinSequence(seq.toString().toUpperCase())) { - throw new IOException("Invalid protein sequence"); + if (count > 0) + { + if (!isValidProteinSequence(sb.toString().toCharArray())) + { + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); } - if (sstart != 0) { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - sstart, send)); - } else { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - 1, seq.length())); - } + seq.setSequence(sb.toString()); + seqs.addElement(seq); } } - public static String print(SequenceI[] s) { - return print(s, 72); - } - public static String print(SequenceI[] s, int len) { - return print(s, len, true); - } - - public static String print(SequenceI[] s, int len, boolean gaps) { - return print(s, len, gaps, true); - } - - public static String print(SequenceI[] s, int len, boolean gaps, - boolean displayId) { + /** + * DOCUMENT ME! + * + * @param s DOCUMENT ME! + * @param len DOCUMENT ME! + * @param gaps DOCUMENT ME! + * @param displayId DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print(SequenceI[] s) + { + int len = 72; StringBuffer out = new StringBuffer(); int i = 0; - while ((i < s.length) && (s[i] != null)) { - String seq = ""; + while ((i < s.length) && (s[i] != null)) + { + out.append(">" + printId(s[i])); + if(s[i].getDescription()!=null) + out.append(" "+s[i].getDescription()); - if (gaps) { - seq = s[i].getSequence(); - } else { - seq = AlignSeq.extractGaps("-. ", s[i].getSequence()); - } - - // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + - out.append(">" + - ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n"); + out.append("\n"); - int nochunks = (seq.length() / len) + 1; + int nochunks = (s[i].getLength() / len) + 1; - for (int j = 0; j < nochunks; j++) { + for (int j = 0; j < nochunks; j++) + { int start = j * len; int end = start + len; - if (end < seq.length()) { - out.append(seq.substring(start, end) + "\n"); - } else if (start < seq.length()) { - out.append(seq.substring(start) + "\n"); + if (end < s[i].getLength()) + { + out.append(s[i].getSequenceAsString(start, end) + "\n"); + } + else if (start < s[i].getLength()) + { + out.append(s[i].getSequenceAsString(start, s[i].getLength()) + "\n"); } } @@ -180,7 +158,13 @@ public class FastaFile extends AlignFile { return out.toString(); } - public String print() { + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String print() + { return print(getSeqsAsArray()); } }