Parse HTML links from jalview features file
[jalview.git] / src / jalview / io / FeaturesFile.java
1 /*\r
2 * Jalview - A Sequence Alignment Editor and Viewer\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
4 *\r
5 * This program is free software; you can redistribute it and/or\r
6 * modify it under the terms of the GNU General Public License\r
7 * as published by the Free Software Foundation; either version 2\r
8 * of the License, or (at your option) any later version.\r
9 *\r
10 * This program is distributed in the hope that it will be useful,\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 * GNU General Public License for more details.\r
14 *\r
15 * You should have received a copy of the GNU General Public License\r
16 * along with this program; if not, write to the Free Software\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA\r
18 */\r
19 package jalview.io;\r
20 \r
21 import jalview.datamodel.*;\r
22 \r
23 import java.io.*;\r
24 import java.util.*;\r
25 import jalview.schemes.UserColourScheme;\r
26 \r
27 \r
28 /**\r
29  * DOCUMENT ME!\r
30  *\r
31  * @author $author$\r
32  * @version $Revision$\r
33  */\r
34 public class FeaturesFile extends AlignFile\r
35 {\r
36     /**\r
37      * Creates a new FeaturesFile object.\r
38      */\r
39     public FeaturesFile()\r
40     {\r
41     }\r
42 \r
43     /**\r
44      * Creates a new FeaturesFile object.\r
45      *\r
46      * @param inStr DOCUMENT ME!\r
47      */\r
48     public FeaturesFile(String inStr)\r
49     {\r
50         super(inStr);\r
51     }\r
52 \r
53     /**\r
54      * Creates a new FeaturesFile object.\r
55      *\r
56      * @param inFile DOCUMENT ME!\r
57      * @param type DOCUMENT ME!\r
58      *\r
59      * @throws IOException DOCUMENT ME!\r
60      */\r
61     public FeaturesFile(String inFile, String type)\r
62        throws IOException\r
63     {\r
64         super(inFile, type);\r
65     }\r
66 \r
67     /**\r
68      * The Application can render HTML, but the applet will\r
69      * remove HTML tags and replace links with %LINK%\r
70      * Both need to read links in HTML however\r
71      *\r
72      * @throws IOException DOCUMENT ME!\r
73      */\r
74     public boolean parse(AlignmentI align,\r
75                          Hashtable colours,\r
76                          boolean removeHTML)\r
77     {\r
78       return parse(align, colours, null, removeHTML);\r
79     }\r
80     /**\r
81      * The Application can render HTML, but the applet will\r
82      * remove HTML tags and replace links with %LINK%\r
83      * Both need to read links in HTML however\r
84      *\r
85      * @throws IOException DOCUMENT ME!\r
86      */\r
87     public boolean parse(AlignmentI align,\r
88                          Hashtable colours,\r
89                          Hashtable featureLink,\r
90                          boolean removeHTML)\r
91     {\r
92       String line = null;\r
93       try\r
94       {\r
95         SequenceI seq = null;\r
96         String type, desc, token=null;\r
97 \r
98         int index, start, end;\r
99         float score;\r
100         StringTokenizer st;\r
101         SequenceFeature sf;\r
102         String featureGroup = null, groupLink = null;\r
103         Hashtable typeLink = new Hashtable();\r
104 \r
105         boolean GFFFile = true;\r
106 \r
107         while ( (line = nextLine()) != null)\r
108         {\r
109           if(line.startsWith("#"))\r
110             continue;\r
111 \r
112           st = new StringTokenizer(line, "\t");\r
113           if (st.countTokens()>1 && st.countTokens() < 4 )\r
114           {\r
115             GFFFile = false;\r
116             type = st.nextToken();\r
117             if (type.equalsIgnoreCase("startgroup"))\r
118             {\r
119               featureGroup = st.nextToken();\r
120               if (st.hasMoreElements())\r
121               {\r
122                 groupLink = st.nextToken();\r
123                 featureLink.put(featureGroup, groupLink);\r
124               }\r
125             }\r
126             else if (type.equalsIgnoreCase("endgroup"))\r
127             {\r
128               //We should check whether this is the current group,\r
129               //but at present theres no way of showing more than 1 group\r
130               st.nextToken();\r
131               featureGroup = null;\r
132               groupLink = null;\r
133             }\r
134             else\r
135             {\r
136               UserColourScheme ucs = new UserColourScheme(st.nextToken());\r
137               colours.put(type, ucs.findColour("A"));\r
138               if (st.hasMoreElements())\r
139               {\r
140                 String link = st.nextToken();\r
141                 typeLink.put(type, link);\r
142                 if(featureLink==null)\r
143                   featureLink = new Hashtable();\r
144                 featureLink.put(type, link);\r
145               }\r
146 \r
147             }\r
148             continue;\r
149           }\r
150 \r
151           while (st.hasMoreElements())\r
152           {\r
153 \r
154             if(GFFFile)\r
155             {\r
156               // Still possible this is an old Jalview file,\r
157               // which does not have type colours at the beginning\r
158               token = st.nextToken();\r
159               seq = align.findName(token);\r
160               if(seq != null)\r
161               {\r
162                 desc = st.nextToken();\r
163                 type = st.nextToken();\r
164                 start = Integer.parseInt(st.nextToken());\r
165                 end = Integer.parseInt(st.nextToken());\r
166                 try\r
167                 {\r
168                   score = Float.parseFloat(st.nextToken());\r
169                 }\r
170                 catch (NumberFormatException ex)\r
171                 {\r
172                   score = 0;\r
173                 }\r
174 \r
175                 sf = new SequenceFeature(type, desc, start, end, score, null);\r
176 \r
177                 try\r
178                 {\r
179                   sf.setValue("STRAND", st.nextToken());\r
180                   sf.setValue("FRAME", st.nextToken());\r
181                 }\r
182                 catch (Exception ex)\r
183                 {}\r
184 \r
185                 seq.getDatasetSequence().addSequenceFeature(sf);\r
186 \r
187                 break;\r
188               }\r
189             }\r
190 \r
191             if(GFFFile && seq==null)\r
192             {\r
193               desc = token;\r
194             }\r
195             else\r
196               desc = st.nextToken();\r
197 \r
198 \r
199             token = st.nextToken();\r
200             if (!token.equals("ID_NOT_SPECIFIED"))\r
201             {\r
202               seq = align.findName(token);\r
203               st.nextToken();\r
204             }\r
205             else\r
206             {\r
207               try{\r
208                 index = Integer.parseInt(st.nextToken());\r
209                 seq = align.getSequenceAt(index);\r
210               }\r
211               catch(NumberFormatException ex)\r
212               {\r
213                 seq = null;\r
214               }\r
215             }\r
216 \r
217             if(seq==null)\r
218             {\r
219               System.out.println("Sequence not found: "+line);\r
220               break;\r
221             }\r
222 \r
223             start = Integer.parseInt(st.nextToken());\r
224             end = Integer.parseInt(st.nextToken());\r
225 \r
226             type = st.nextToken();\r
227 \r
228             if (!colours.containsKey(type))\r
229             {\r
230               // Probably the old style groups file\r
231               UserColourScheme ucs = new UserColourScheme(type);\r
232               colours.put(type, ucs.findColour("A"));\r
233             }\r
234 \r
235             sf = new SequenceFeature(type, desc, "", start, end, featureGroup);\r
236 \r
237             seq.addSequenceFeature(sf);\r
238 \r
239             if(groupLink!=null && removeHTML)\r
240             {\r
241               sf.addLink(groupLink);\r
242               sf.description += "%LINK%";\r
243             }\r
244             if(typeLink.containsKey(type) && removeHTML)\r
245             {\r
246               sf.addLink(typeLink.get(type).toString());\r
247               sf.description += "%LINK%";\r
248             }\r
249 \r
250             parseDescriptionHTML(sf, removeHTML);\r
251 \r
252             //If we got here, its not a GFFFile\r
253             GFFFile = false;\r
254           }\r
255         }\r
256       }\r
257       catch (Exception ex)\r
258       {\r
259         System.out.println(line);\r
260         ex.printStackTrace();\r
261         System.out.println("Error parsing feature file: " + ex +"\n"+line);\r
262         return false;\r
263       }\r
264 \r
265       return true;\r
266     }\r
267 \r
268     void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)\r
269     {\r
270       StringBuffer sb = new StringBuffer();\r
271       StringTokenizer st = new StringTokenizer(sf.getDescription(), "<");\r
272       String token,  link;\r
273       while(st.hasMoreElements())\r
274       {\r
275         token = st.nextToken("<>");\r
276         if(token.equalsIgnoreCase("html") || token.startsWith("/"))\r
277           continue;\r
278 \r
279         if(token.startsWith("a href="))\r
280         {\r
281           link = token.substring(token.indexOf("\"")+1, token.length()-1);\r
282           String label = st.nextToken("<>");\r
283           sf.addLink(label+"|"+link);\r
284           sb.append(label+"%LINK%");\r
285         }\r
286         else if(token.equalsIgnoreCase("br"))\r
287           sb.append("\n");\r
288         else\r
289           sb.append(token);\r
290       }\r
291 \r
292       if(removeHTML)\r
293         sf.description = sb.toString();\r
294   }\r
295 \r
296     /**\r
297      * DOCUMENT ME!\r
298      *\r
299      * @param s DOCUMENT ME!\r
300      * @param len DOCUMENT ME!\r
301      * @param gaps DOCUMENT ME!\r
302      * @param displayId DOCUMENT ME!\r
303      *\r
304      * @return DOCUMENT ME!\r
305      */\r
306     public String printJalviewFormat(SequenceI [] seqs,\r
307                                      Hashtable visible)\r
308     {\r
309         StringBuffer out = new StringBuffer();\r
310         SequenceFeature [] next;\r
311 \r
312         if(visible==null || visible.size()<1)\r
313           return "No Features Visible";\r
314 \r
315         Enumeration en = visible.keys();\r
316         String type;\r
317         int color;\r
318         while( en.hasMoreElements() )\r
319         {\r
320           type = en.nextElement().toString();\r
321           color = Integer.parseInt( visible.get(type).toString() );\r
322           out.append(type + "\t"\r
323                      + jalview.util.Format.getHexString(\r
324                          new java.awt.Color(color)  )\r
325                      +"\n");\r
326         }\r
327 \r
328         //Work out which groups are both present and visible\r
329         Vector groups = new Vector();\r
330         int groupIndex = 0;\r
331 \r
332         for(int i=0; i<seqs.length; i++)\r
333         {\r
334           next = seqs[i].getSequenceFeatures();\r
335           if(next!=null)\r
336           {\r
337             for(int j=0; j<next.length; j++)\r
338             {\r
339               if (!visible.containsKey(next[j].type))\r
340                 continue;\r
341 \r
342               if (    next[j].featureGroup != null\r
343                   && !groups.contains(next[j].featureGroup))\r
344                 groups.addElement(next[j].featureGroup);\r
345             }\r
346           }\r
347         }\r
348 \r
349         String group = null;\r
350 \r
351         do\r
352         {\r
353 \r
354 \r
355           if (groups.size() > 0 && groupIndex < groups.size())\r
356           {\r
357             group = groups.elementAt(groupIndex).toString();\r
358             out.append("\nSTARTGROUP\t" + group + "\n");\r
359           }\r
360           else\r
361             group = null;\r
362 \r
363           for (int i = 0; i < seqs.length; i++)\r
364           {\r
365             next = seqs[i].getSequenceFeatures();\r
366             if (next != null)\r
367             {\r
368               for (int j = 0; j < next.length; j++)\r
369               {\r
370                 if (!visible.containsKey(next[j].type))\r
371                   continue;\r
372 \r
373                 if (group != null\r
374                     && (next[j].featureGroup==null\r
375                         || !next[j].featureGroup.equals(group))\r
376                   )\r
377                   continue;\r
378 \r
379                 if(group==null && next[j].featureGroup!=null)\r
380                   continue;\r
381 \r
382                 if(next[j].description==null || next[j].description.equals(""))\r
383                   out.append(next[j].type+"\t");\r
384                 else\r
385                   out.append(next[j].description + "\t");\r
386 \r
387                 out.append(  seqs[i].getName() + "\t-1\t"\r
388                            + next[j].begin + "\t"\r
389                            + next[j].end + "\t"\r
390                            + next[j].type + "\n"\r
391                     );\r
392               }\r
393             }\r
394           }\r
395 \r
396           if(group!=null)\r
397           {\r
398             out.append("ENDGROUP\t"+group+"\n");\r
399             groupIndex++;\r
400           }\r
401           else\r
402             break;\r
403 \r
404         }\r
405         while(groupIndex < groups.size()+1);\r
406 \r
407 \r
408       return out.toString();\r
409     }\r
410 \r
411     public String printGFFFormat(SequenceI [] seqs, Hashtable visible)\r
412     {\r
413       StringBuffer out = new StringBuffer();\r
414       SequenceFeature [] next;\r
415 \r
416       for(int i=0; i<seqs.length; i++)\r
417       {\r
418         if(seqs[i].getSequenceFeatures()!=null)\r
419         {\r
420           next = seqs[i].getSequenceFeatures();\r
421           for(int j=0; j<next.length; j++)\r
422           {\r
423             if(!visible.containsKey(next[j].type))\r
424               continue;\r
425 \r
426             out.append(seqs[i].getName() + "\t"\r
427                        + next[j].description + "\t"\r
428                        + next[j].type  + "\t"\r
429                        + next[j].begin + "\t"\r
430                        + next[j].end   + "\t"\r
431                        + next[j].score + "\t"\r
432                       );\r
433 \r
434             if(next[j].getValue("STRAND")!=null)\r
435               out.append(next[j].getValue("STRAND")+"\t");\r
436             else\r
437               out.append(".\t");\r
438             if(next[j].getValue("FRAME")!=null)\r
439               out.append(next[j].getValue("FRAME")+"\n");\r
440             else\r
441               out.append(".\n");\r
442 \r
443           }\r
444         }\r
445       }\r
446 \r
447       return out.toString();\r
448     }\r
449 \r
450     public void parse()\r
451     {\r
452       //IGNORED\r
453     }\r
454 \r
455     /**\r
456      * DOCUMENT ME!\r
457      *\r
458      * @return DOCUMENT ME!\r
459      */\r
460     public String print()\r
461     {\r
462         return "USE printGFFFormat() or printJalviewFormat()";\r
463     }\r
464 }\r