X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFastaFile.java;h=6d218f434515fa862b5ec3356704d2ae9f97e301;hb=424a0faf266f649090f17dc65246cdfb4111217e;hp=757bb57dbaac990bcf6fb3b16d476e338d4941bf;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/io/FastaFile.java b/src/jalview/io/FastaFile.java index 757bb57..6d218f4 100755 --- a/src/jalview/io/FastaFile.java +++ b/src/jalview/io/FastaFile.java @@ -18,14 +18,10 @@ */ package jalview.io; -import jalview.analysis.*; - import jalview.datamodel.*; import java.io.*; -import java.util.*; - /** * DOCUMENT ME! @@ -72,154 +68,58 @@ public class FastaFile extends AlignFile */ public void parse() throws IOException { - String id = ""; - StringBuffer seq = new StringBuffer(); + StringBuffer sb = new StringBuffer(); int count = 0; - int sstart = 0; - int send = 0; - String line; + Sequence seq = null; while ((line = nextLine()) != null) { + line = line.trim(); if (line.length() > 0) { - // Do we have an id line? - // JBPNote - this code needs to be standardised to EBI/whatever for the - // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist) - if (line.substring(0, 1).equals(">")) + if (line.charAt(0)=='>') { if (count != 0) { - if (sstart != 0) - { - seqs.addElement(new Sequence(id, seq.toString(), - sstart, send)); - } - else - { - seqs.addElement(new Sequence(id, seq.toString(), 1, - seq.length())); - } + if (!isValidProteinSequence(sb.toString())) + { + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); + } + + seq.setSequence(sb.toString()); + seqs.addElement(seq); } - count++; - - StringTokenizer str = new StringTokenizer(line, " "); - - id = str.nextToken(); - id = id.substring(1); - - com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex( - "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)"); - - // JBPNote At the moment - we don't get rid of the friendly names but this - // behaviour is probably wrong in the long run. - if (dbId.search(id)) - { - String dbid = dbId.stringMatched(1); - String idname = dbId.stringMatched(2); - - if ((idname.length() > 0) && - (idname.indexOf("_") > -1)) - { - id = idname; // use the friendly name - apparently no dbid - } - else if (dbid.length() > 1) - { - id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise - } - } - - if (id.indexOf("/") > 0) - { - StringTokenizer st = new StringTokenizer(id, "/"); - - if (st.countTokens() == 2) - { - id = st.nextToken(); - - String tmp = st.nextToken(); + seq = parseId(line.substring(1)); - st = new StringTokenizer(tmp, "-"); - - if (st.countTokens() == 2) - { - sstart = Integer.valueOf(st.nextToken()) - .intValue(); - send = Integer.valueOf(st.nextToken()).intValue(); - } - } - } - - seq = new StringBuffer(); + count++; + sb = new StringBuffer(); } else { - seq = seq.append(line); + sb.append(line); } } } if (count > 0) { - if (!isValidProteinSequence(seq.toString().toUpperCase())) + if (!isValidProteinSequence(sb.toString())) { - throw new IOException("Invalid protein sequence"); + throw new IOException(AppletFormatAdapter.INVALID_CHARACTERS + +" : "+seq.getName() + +" : "+invalidCharacter); } - if (sstart != 0) - { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - sstart, send)); - } - else - { - seqs.addElement(new Sequence(id, seq.toString().toUpperCase(), - 1, seq.length())); - } + seq.setSequence(sb.toString()); + seqs.addElement(seq); } } - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s) - { - return print(s, 72); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s, int len) - { - return print(s, len, true); - } - - /** - * DOCUMENT ME! - * - * @param s DOCUMENT ME! - * @param len DOCUMENT ME! - * @param gaps DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public static String print(SequenceI[] s, int len, boolean gaps) - { - return print(s, len, gaps, true); - } /** * DOCUMENT ME! @@ -231,43 +131,34 @@ public class FastaFile extends AlignFile * * @return DOCUMENT ME! */ - public static String print(SequenceI[] s, int len, boolean gaps, - boolean displayId) + public String print(SequenceI[] s) { + int len = 72; StringBuffer out = new StringBuffer(); int i = 0; while ((i < s.length) && (s[i] != null)) { - String seq = ""; - - if (gaps) - { - seq = s[i].getSequence(); - } - else - { - seq = AlignSeq.extractGaps("-. ", s[i].getSequence()); - } + out.append(">" + printId(s[i])); + if(s[i].getDescription()!=null) + out.append(" "+s[i].getDescription()); - // used to always put this here: + "/" + s[i].getStart() + "-" + s[i].getEnd() + - out.append(">" + - ((displayId) ? s[i].getDisplayId() : s[i].getName()) + "\n"); + out.append("\n"); - int nochunks = (seq.length() / len) + 1; + int nochunks = (s[i].getLength() / len) + 1; for (int j = 0; j < nochunks; j++) { int start = j * len; int end = start + len; - if (end < seq.length()) + if (end < s[i].getLength()) { - out.append(seq.substring(start, end) + "\n"); + out.append(s[i].getSequence(start, end) + "\n"); } - else if (start < seq.length()) + else if (start < s[i].getLength()) { - out.append(seq.substring(start) + "\n"); + out.append(s[i].getSequence(start, s[i].getLength()) + "\n"); } }