package jalview.io; import jalview.datamodel.*; import jalview.util.*; import java.io.*; import java.util.*; public class ClustalFile extends AlignFile { Vector ids; public ClustalFile() {} public ClustalFile(String inStr) { super(inStr); } public void initData() { super.initData(); ids = new Vector(); } public ClustalFile(String inFile, String type) throws IOException { super(inFile,type); } public void parse() { int i = 0; boolean flag = false; Vector headers = new Vector(); Hashtable seqhash = new Hashtable(); String line; try { while ((line = nextLine()) != null) { if (line.indexOf(" ") != 0) { StringTokenizer str = new StringTokenizer(line," "); String id = ""; if (str.hasMoreTokens()) { id = str.nextToken(); if (id.equals("CLUSTAL")) { flag = true; } else { if (flag) { StringBuffer tempseq; if (seqhash.containsKey(id)) { tempseq = (StringBuffer)seqhash.get(id); } else { tempseq = new StringBuffer(); seqhash.put(id,tempseq); } if (!(headers.contains(id))) { headers.addElement(id); } tempseq.append(str.nextToken()); } } } } } } catch (IOException e) { System.err.println("Exception parsing clustal file " + e); e.stacktrace(); } if (flag) { this.noSeqs = headers.size(); //Add sequences to the hash for (i = 0; i < headers.size(); i++ ) { int start = -1; int end = -1; if ( seqhash.get(headers.elementAt(i)) != null) { if (maxLength < seqhash.get(headers.elementAt(i)).toString().length() ) { maxLength = seqhash.get(headers.elementAt(i)).toString().length(); } String head = headers.elementAt(i).toString(); start = 1; end = seqhash.get(headers.elementAt(i)).toString().length(); if (head.indexOf("/") > 0 ) { StringTokenizer st = new StringTokenizer(head,"/"); if (st.countTokens() == 2) { ids.addElement(st.nextToken()); String tmp = st.nextToken(); st = new StringTokenizer(tmp,"-"); if (st.countTokens() == 2) { start = Integer.valueOf(st.nextToken()).intValue(); end = Integer.valueOf(st.nextToken()).intValue(); } } else { ids.addElement(headers.elementAt(i)); } } else { ids.addElement(headers.elementAt(i)); } Sequence newSeq = new Sequence(ids.elementAt(i).toString(), seqhash.get(headers.elementAt(i).toString()).toString(),start,end); seqs.addElement(newSeq); } else { System.err.println("Clustal File Reader: Can't find sequence for " + headers.elementAt(i)); } } } } public String print() { return print(getSeqsAsArray()); } public static String print(SequenceI[] s) { StringBuffer out = new StringBuffer("CLUSTAL\n\n"); int max = 0; int maxid = 0; int i = 0; while (i < s.length && s[i] != null) { String tmp = s[i].getName() + "/" + s[i].getStart() + "-" + s[i].getEnd(); if (s[i].getSequence().length() > max) { max = s[i].getSequence().length(); } if (tmp.length() > maxid) { maxid = tmp.length(); } i++; } if (maxid < 15) { maxid = 15; } maxid++; int len = 60; int nochunks = max / len + 1; for (i = 0; i < nochunks; i++) { int j = 0; while ( j < s.length && s[j] != null) { out.append( new Format("%-" + maxid + "s").form(s[j].getName() + "/" + s[j].getStart() + "-" + s[j].getEnd()) + " "); int start = i*len; int end = start + len; if (end < s[j].getSequence().length() && start < s[j].getSequence().length() ) { out.append(s[j].getSequence().substring(start,end) + "\n"); } else { if (start < s[j].getSequence().length()) { out.append(s[j].getSequence().substring(start) + "\n"); } } j++; } out.append("\n"); } return out.toString(); } }