3 import jalview.datamodel.*;
\r
4 import jalview.util.*;
\r
9 public class PfamFile extends AlignFile {
\r
16 public PfamFile(String inStr) {
\r
20 public void initData() {
\r
25 public PfamFile(String inFile, String type) throws IOException {
\r
29 public void parse() throws IOException{
\r
34 Hashtable seqhash = new Hashtable();
\r
35 Vector headers = new Vector();
\r
38 while ((line = nextLine()) != null)
\r
41 if (line.indexOf(" ") != 0)
\r
43 if (line.indexOf("#") != 0)
\r
46 StringTokenizer str = new StringTokenizer(line," ");
\r
49 if (str.hasMoreTokens())
\r
51 id = str.nextToken();
\r
53 StringBuffer tempseq;
\r
55 if (seqhash.containsKey(id))
\r
56 tempseq = (StringBuffer)seqhash.get(id);
\r
59 tempseq = new StringBuffer();
\r
60 seqhash.put(id,tempseq);
\r
63 if (!(headers.contains(id)))
\r
64 headers.addElement(id);
\r
67 tempseq.append(str.nextToken());
\r
73 this.noSeqs = headers.size();
\r
75 throw new IOException("No sequences found (PFAM input)");
\r
77 for (i = 0; i < headers.size(); i++ ) {
\r
79 if ( seqhash.get(headers.elementAt(i)) != null) {
\r
80 if (maxLength < seqhash.get(headers.elementAt(i)).toString().length() )
\r
81 maxLength = seqhash.get(headers.elementAt(i)).toString().length();
\r
83 String head = headers.elementAt(i).toString();
\r
85 int end = seqhash.get(headers.elementAt(i)).toString().length();
\r
87 if (head.indexOf("/") > 0 ) {
\r
88 StringTokenizer st = new StringTokenizer(head,"/");
\r
89 if (st.countTokens() == 2) {
\r
90 ids.addElement(st.nextToken());
\r
91 String tmp = st.nextToken();
\r
92 st = new StringTokenizer(tmp,"-");
\r
93 if (st.countTokens() == 2) {
\r
94 start = Integer.valueOf(st.nextToken()).intValue();
\r
95 end = Integer.valueOf(st.nextToken()).intValue();
\r
102 ids.addElement(headers.elementAt(i));
\r
106 ids.addElement(headers.elementAt(i));
\r
109 Sequence newSeq = null;
\r
110 if (start != -1 && end != -1)
\r
112 newSeq = new Sequence(ids.elementAt(i).toString(),
\r
113 seqhash.get(headers.elementAt(i).toString()).toString(),start,end);
\r
114 seqs.addElement(newSeq);
\r
118 newSeq = new Sequence(ids.elementAt(i).toString(),
\r
119 seqhash.get(headers.elementAt(i).toString()).toString(),1,
\r
120 seqhash.get(headers.elementAt(i).toString()).toString().length());
\r
121 seqs.addElement(newSeq);
\r
124 if(!isValidProteinSequence(newSeq.getSequence()))
\r
125 throw new IOException("Not a valid protein sequence - (PFAM input)");
\r
128 System.out.println("Can't find sequence for " + headers.elementAt(i));
\r
134 public static String print(SequenceI[] s) {
\r
135 StringBuffer out = new StringBuffer("");
\r
142 while (i < s.length && s[i] != null) {
\r
143 String tmp = s[i].getName() + "/" + s[i].getStart()+ "-" + s[i].getEnd();
\r
145 if (s[i].getSequence().length() > max) {
\r
146 max = s[i].getSequence().length();
\r
148 if (tmp.length() > maxid) {
\r
149 maxid = tmp.length();
\r
159 while ( j < s.length && s[j] != null) {
\r
160 out.append( new Format("%-" + maxid + "s").form(s[j].getName() + "/" + s[j].getStart() + "-" + s[j].getEnd() ) + " ");
\r
162 out.append(s[j].getSequence() + "\n");
\r
167 return out.toString();
\r
170 public String print() {
\r
171 return print(getSeqsAsArray());
\r