description may be null
[jalview.git] / src / jalview / io / FeaturesFile.java
1 /*\r
2 * Jalview - A Sequence Alignment Editor and Viewer\r
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
4 *\r
5 * This program is free software; you can redistribute it and/or\r
6 * modify it under the terms of the GNU General Public License\r
7 * as published by the Free Software Foundation; either version 2\r
8 * of the License, or (at your option) any later version.\r
9 *\r
10 * This program is distributed in the hope that it will be useful,\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 * GNU General Public License for more details.\r
14 *\r
15 * You should have received a copy of the GNU General Public License\r
16 * along with this program; if not, write to the Free Software\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA\r
18 */\r
19 package jalview.io;\r
20 \r
21 import jalview.datamodel.*;\r
22 \r
23 import java.io.*;\r
24 import java.util.*;\r
25 import jalview.schemes.UserColourScheme;\r
26 \r
27 \r
28 /**\r
29  * DOCUMENT ME!\r
30  *\r
31  * @author $author$\r
32  * @version $Revision$\r
33  */\r
34 public class FeaturesFile extends AlignFile\r
35 {\r
36     /**\r
37      * Creates a new FeaturesFile object.\r
38      */\r
39     public FeaturesFile()\r
40     {\r
41     }\r
42 \r
43 \r
44     /**\r
45      * Creates a new FeaturesFile object.\r
46      *\r
47      * @param inFile DOCUMENT ME!\r
48      * @param type DOCUMENT ME!\r
49      *\r
50      * @throws IOException DOCUMENT ME!\r
51      */\r
52     public FeaturesFile(String inFile, String type)\r
53        throws IOException\r
54     {\r
55         super(inFile, type);\r
56     }\r
57 \r
58     /**\r
59      * The Application can render HTML, but the applet will\r
60      * remove HTML tags and replace links with %LINK%\r
61      * Both need to read links in HTML however\r
62      *\r
63      * @throws IOException DOCUMENT ME!\r
64      */\r
65     public boolean parse(AlignmentI align,\r
66                          Hashtable colours,\r
67                          boolean removeHTML)\r
68     {\r
69       return parse(align, colours, null, removeHTML);\r
70     }\r
71     /**\r
72      * The Application can render HTML, but the applet will\r
73      * remove HTML tags and replace links with %LINK%\r
74      * Both need to read links in HTML however\r
75      *\r
76      * @throws IOException DOCUMENT ME!\r
77      */\r
78     public boolean parse(AlignmentI align,\r
79                          Hashtable colours,\r
80                          Hashtable featureLink,\r
81                          boolean removeHTML)\r
82     {\r
83       String line = null;\r
84       try\r
85       {\r
86         SequenceI seq = null;\r
87         String type, desc, token=null;\r
88 \r
89         int index, start, end;\r
90         float score;\r
91         StringTokenizer st;\r
92         SequenceFeature sf;\r
93         String featureGroup = null, groupLink = null;\r
94         Hashtable typeLink = new Hashtable();\r
95 \r
96         boolean GFFFile = true;\r
97 \r
98         while ( (line = nextLine()) != null)\r
99         {\r
100           if(line.startsWith("#"))\r
101             continue;\r
102 \r
103           st = new StringTokenizer(line, "\t");\r
104           if (st.countTokens()>1 && st.countTokens() < 4 )\r
105           {\r
106             GFFFile = false;\r
107             type = st.nextToken();\r
108             if (type.equalsIgnoreCase("startgroup"))\r
109             {\r
110               featureGroup = st.nextToken();\r
111               if (st.hasMoreElements())\r
112               {\r
113                 groupLink = st.nextToken();\r
114                 featureLink.put(featureGroup, groupLink);\r
115               }\r
116             }\r
117             else if (type.equalsIgnoreCase("endgroup"))\r
118             {\r
119               //We should check whether this is the current group,\r
120               //but at present theres no way of showing more than 1 group\r
121               st.nextToken();\r
122               featureGroup = null;\r
123               groupLink = null;\r
124             }\r
125             else\r
126             {\r
127               UserColourScheme ucs = new UserColourScheme(st.nextToken());\r
128               colours.put(type, ucs.findColour('A'));\r
129               if (st.hasMoreElements())\r
130               {\r
131                 String link = st.nextToken();\r
132                 typeLink.put(type, link);\r
133                 if(featureLink==null)\r
134                   featureLink = new Hashtable();\r
135                 featureLink.put(type, link);\r
136               }\r
137 \r
138             }\r
139             continue;\r
140           }\r
141 \r
142           while (st.hasMoreElements())\r
143           {\r
144 \r
145             if(GFFFile)\r
146             {\r
147               // Still possible this is an old Jalview file,\r
148               // which does not have type colours at the beginning\r
149               token = st.nextToken();\r
150               seq = align.findName(token);\r
151               if(seq != null)\r
152               {\r
153                 desc = st.nextToken();\r
154                 type = st.nextToken();\r
155                 start = Integer.parseInt(st.nextToken());\r
156                 end = Integer.parseInt(st.nextToken());\r
157                 try\r
158                 {\r
159                   score =  new Float(st.nextToken()).floatValue();\r
160                 }\r
161                 catch (NumberFormatException ex)\r
162                 {\r
163                   score = 0;\r
164                 }\r
165 \r
166                 sf = new SequenceFeature(type, desc, start, end, score, null);\r
167 \r
168                 try\r
169                 {\r
170                   sf.setValue("STRAND", st.nextToken());\r
171                   sf.setValue("FRAME", st.nextToken());\r
172                 }\r
173                 catch (Exception ex)\r
174                 {}\r
175 \r
176                 if(st.hasMoreTokens())\r
177                 {\r
178                   StringBuffer attributes = new StringBuffer();\r
179                   while (st.hasMoreTokens())\r
180                   {\r
181                     attributes.append("\t"+st.nextElement());\r
182                   }\r
183                   sf.setValue("ATTRIBUTES", attributes.toString());\r
184                 }\r
185 \r
186                 seq.addSequenceFeature(sf);\r
187 \r
188                 break;\r
189               }\r
190             }\r
191 \r
192             if(GFFFile && seq==null)\r
193             {\r
194               desc = token;\r
195             }\r
196             else\r
197               desc = st.nextToken();\r
198 \r
199 \r
200             token = st.nextToken();\r
201             if (!token.equals("ID_NOT_SPECIFIED"))\r
202             {\r
203               seq = align.findName(token);\r
204               st.nextToken();\r
205             }\r
206             else\r
207             {\r
208               try{\r
209                 index = Integer.parseInt(st.nextToken());\r
210                 seq = align.getSequenceAt(index);\r
211               }\r
212               catch(NumberFormatException ex)\r
213               {\r
214                 seq = null;\r
215               }\r
216             }\r
217 \r
218             if(seq==null)\r
219             {\r
220               System.out.println("Sequence not found: "+line);\r
221               break;\r
222             }\r
223 \r
224             start = Integer.parseInt(st.nextToken());\r
225             end = Integer.parseInt(st.nextToken());\r
226 \r
227             type = st.nextToken();\r
228 \r
229             if (!colours.containsKey(type))\r
230             {\r
231               // Probably the old style groups file\r
232               UserColourScheme ucs = new UserColourScheme(type);\r
233               colours.put(type, ucs.findColour('A'));\r
234             }\r
235 \r
236             sf = new SequenceFeature(type, desc, "", start, end, featureGroup);\r
237 \r
238             seq.addSequenceFeature(sf);\r
239 \r
240             if(groupLink!=null && removeHTML)\r
241             {\r
242               sf.addLink(groupLink);\r
243               sf.description += "%LINK%";\r
244             }\r
245             if(typeLink.containsKey(type) && removeHTML)\r
246             {\r
247               sf.addLink(typeLink.get(type).toString());\r
248               sf.description += "%LINK%";\r
249             }\r
250 \r
251             parseDescriptionHTML(sf, removeHTML);\r
252 \r
253             //If we got here, its not a GFFFile\r
254             GFFFile = false;\r
255           }\r
256         }\r
257       }\r
258       catch (Exception ex)\r
259       {\r
260         System.out.println(line);\r
261         System.out.println("Error parsing feature file: " + ex +"\n"+line);\r
262         return false;\r
263       }\r
264 \r
265       return true;\r
266     }\r
267 \r
268     public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
269     {\r
270       if(sf.getDescription()==null)\r
271         return;\r
272 \r
273       if(removeHTML && sf.getDescription().toUpperCase().indexOf("<HTML>")==-1)\r
274         removeHTML = false;\r
275 \r
276       StringBuffer sb = new StringBuffer();\r
277       StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
278       String token,  link;\r
279       int startTag;\r
280       String tag=null;\r
281       while(st.hasMoreElements())\r
282       {\r
283         token = st.nextToken("&>");\r
284         if(token.equalsIgnoreCase("html") || token.startsWith("/"))\r
285         {\r
286           continue;\r
287         }\r
288 \r
289         tag = null;\r
290         startTag = token.indexOf("<");\r
291 \r
292         if (startTag > -1)\r
293         {\r
294           tag = token.substring(startTag+1);\r
295           token = token.substring(0, startTag);\r
296         }\r
297 \r
298         if (tag != null && tag.toUpperCase().startsWith("A HREF="))\r
299         {\r
300           if(token.length()>0)\r
301             sb.append(token);\r
302           link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);\r
303           String label = st.nextToken("<>");\r
304           sf.addLink(label + "|" + link);\r
305           sb.append(label + "%LINK%");\r
306         }\r
307         else if (tag != null && tag.equalsIgnoreCase("br"))\r
308           sb.append("\n");\r
309         else if (token.startsWith("lt;"))\r
310           sb.append("<" + token.substring(3));\r
311         else if (token.startsWith("gt;"))\r
312           sb.append(">" + token.substring(3));\r
313         else if (token.startsWith("amp;"))\r
314           sb.append("&" + token.substring(4));\r
315         else\r
316         {\r
317           sb.append(token);\r
318         }\r
319       }\r
320 \r
321       if(removeHTML)\r
322         sf.description = sb.toString();\r
323 \r
324   }\r
325 \r
326     /**\r
327      * DOCUMENT ME!\r
328      *\r
329      * @param s DOCUMENT ME!\r
330      * @param len DOCUMENT ME!\r
331      * @param gaps DOCUMENT ME!\r
332      * @param displayId DOCUMENT ME!\r
333      *\r
334      * @return DOCUMENT ME!\r
335      */\r
336     public String printJalviewFormat(SequenceI [] seqs,\r
337                                      Hashtable visible)\r
338     {\r
339         StringBuffer out = new StringBuffer();\r
340         SequenceFeature [] next;\r
341 \r
342         if(visible==null || visible.size()<1)\r
343           return "No Features Visible";\r
344 \r
345         Enumeration en = visible.keys();\r
346         String type;\r
347         int color;\r
348         while( en.hasMoreElements() )\r
349         {\r
350           type = en.nextElement().toString();\r
351           color = Integer.parseInt( visible.get(type).toString() );\r
352           out.append(type + "\t"\r
353                      + jalview.util.Format.getHexString(\r
354                          new java.awt.Color(color)  )\r
355                      +"\n");\r
356         }\r
357 \r
358         //Work out which groups are both present and visible\r
359         Vector groups = new Vector();\r
360         int groupIndex = 0;\r
361 \r
362         for(int i=0; i<seqs.length; i++)\r
363         {\r
364           next = seqs[i].getSequenceFeatures();\r
365           if(next!=null)\r
366           {\r
367             for(int j=0; j<next.length; j++)\r
368             {\r
369               if (!visible.containsKey(next[j].type))\r
370                 continue;\r
371 \r
372               if (    next[j].featureGroup != null\r
373                   && !groups.contains(next[j].featureGroup))\r
374                 groups.addElement(next[j].featureGroup);\r
375             }\r
376           }\r
377         }\r
378 \r
379         String group = null;\r
380 \r
381         do\r
382         {\r
383 \r
384 \r
385           if (groups.size() > 0 && groupIndex < groups.size())\r
386           {\r
387             group = groups.elementAt(groupIndex).toString();\r
388             out.append("\nSTARTGROUP\t" + group + "\n");\r
389           }\r
390           else\r
391             group = null;\r
392 \r
393           for (int i = 0; i < seqs.length; i++)\r
394           {\r
395             next = seqs[i].getSequenceFeatures();\r
396             if (next != null)\r
397             {\r
398               for (int j = 0; j < next.length; j++)\r
399               {\r
400                 if (!visible.containsKey(next[j].type))\r
401                   continue;\r
402 \r
403                 if (group != null\r
404                     && (next[j].featureGroup==null\r
405                         || !next[j].featureGroup.equals(group))\r
406                   )\r
407                   continue;\r
408 \r
409                 if(group==null && next[j].featureGroup!=null)\r
410                   continue;\r
411 \r
412                 if(next[j].description==null || next[j].description.equals(""))\r
413                   out.append(next[j].type+"\t");\r
414                 else\r
415                 {\r
416                   if(next[j].links!=null\r
417                      && next[j].getDescription().indexOf("<html>")==-1)\r
418                     out.append("<html>");\r
419 \r
420                   out.append(next[j].description+" ");\r
421                   if(next[j].links!=null)\r
422                     {\r
423                       for(int l=0; l<next[j].links.size(); l++)\r
424                       {\r
425                         String label = next[j].links.elementAt(l).toString();\r
426                         String href = label.substring(label.indexOf("|")+1);\r
427                         label = label.substring(0, label.indexOf("|"));\r
428 \r
429                         if(next[j].description.indexOf(href)==-1)\r
430                         {\r
431                           out.append("<a href=\""\r
432                                      + href\r
433                                      + "\">"\r
434                                      + label\r
435                                      + "</a>");\r
436                         }\r
437                       }\r
438 \r
439                       if (next[j].getDescription().indexOf("</html>") == -1)\r
440                         out.append("</html>");\r
441                     }\r
442 \r
443 \r
444                    out.append("\t");\r
445                 }\r
446 \r
447 \r
448                 out.append(  seqs[i].getName() + "\t-1\t"\r
449                            + next[j].begin + "\t"\r
450                            + next[j].end + "\t"\r
451                            + next[j].type + "\n"\r
452                     );\r
453               }\r
454             }\r
455           }\r
456 \r
457           if(group!=null)\r
458           {\r
459             out.append("ENDGROUP\t"+group+"\n");\r
460             groupIndex++;\r
461           }\r
462           else\r
463             break;\r
464 \r
465         }\r
466         while(groupIndex < groups.size()+1);\r
467 \r
468 \r
469       return out.toString();\r
470     }\r
471 \r
472     public String printGFFFormat(SequenceI [] seqs, Hashtable visible)\r
473     {\r
474       StringBuffer out = new StringBuffer();\r
475       SequenceFeature [] next;\r
476       String source;\r
477 \r
478       for(int i=0; i<seqs.length; i++)\r
479       {\r
480         if(seqs[i].getSequenceFeatures()!=null)\r
481         {\r
482           next = seqs[i].getSequenceFeatures();\r
483           for(int j=0; j<next.length; j++)\r
484           {\r
485             if(!visible.containsKey(next[j].type))\r
486               continue;\r
487 \r
488             source = next[j].featureGroup;\r
489             if(source==null)\r
490               source = next[j].getDescription();\r
491 \r
492             out.append(seqs[i].getName() + "\t"\r
493                        + source + "\t"\r
494                        + next[j].type  + "\t"\r
495                        + next[j].begin + "\t"\r
496                        + next[j].end   + "\t"\r
497                        + next[j].score + "\t"\r
498                       );\r
499 \r
500             if(next[j].getValue("STRAND")!=null)\r
501               out.append(next[j].getValue("STRAND")+"\t");\r
502             else\r
503               out.append(".\t");\r
504 \r
505             if(next[j].getValue("FRAME")!=null)\r
506               out.append(next[j].getValue("FRAME"));\r
507             else\r
508               out.append(".");\r
509 \r
510             if(next[j].getValue("ATTRIBUTES")!=null)\r
511               out.append(next[j].getValue("ATTRIBUTES"));\r
512 \r
513             out.append("\n");\r
514 \r
515           }\r
516         }\r
517       }\r
518 \r
519       return out.toString();\r
520     }\r
521 \r
522     public void parse()\r
523     {\r
524       //IGNORED\r
525     }\r
526 \r
527     /**\r
528      * DOCUMENT ME!\r
529      *\r
530      * @return DOCUMENT ME!\r
531      */\r
532     public String print()\r
533     {\r
534         return "USE printGFFFormat() or printJalviewFormat()";\r
535     }\r
536 }\r