Annotation/feature exporter
[jalview.git] / src / jalview / io / FeaturesFile.java
1 /*\r
2 * Jalview - A Sequence Alignment Editor and Viewer\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
4 *\r
5 * This program is free software; you can redistribute it and/or\r
6 * modify it under the terms of the GNU General Public License\r
7 * as published by the Free Software Foundation; either version 2\r
8 * of the License, or (at your option) any later version.\r
9 *\r
10 * This program is distributed in the hope that it will be useful,\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 * GNU General Public License for more details.\r
14 *\r
15 * You should have received a copy of the GNU General Public License\r
16 * along with this program; if not, write to the Free Software\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA\r
18 */\r
19 package jalview.io;\r
20 \r
21 import jalview.datamodel.*;\r
22 \r
23 import java.io.*;\r
24 import java.util.*;\r
25 import jalview.schemes.UserColourScheme;\r
26 \r
27 \r
28 /**\r
29  * DOCUMENT ME!\r
30  *\r
31  * @author $author$\r
32  * @version $Revision$\r
33  */\r
34 public class FeaturesFile extends AlignFile\r
35 {\r
36     /**\r
37      * Creates a new FeaturesFile object.\r
38      */\r
39     public FeaturesFile()\r
40     {\r
41     }\r
42 \r
43     /**\r
44      * Creates a new FeaturesFile object.\r
45      *\r
46      * @param inStr DOCUMENT ME!\r
47      */\r
48     public FeaturesFile(String inStr)\r
49     {\r
50         super(inStr);\r
51     }\r
52 \r
53     /**\r
54      * Creates a new FeaturesFile object.\r
55      *\r
56      * @param inFile DOCUMENT ME!\r
57      * @param type DOCUMENT ME!\r
58      *\r
59      * @throws IOException DOCUMENT ME!\r
60      */\r
61     public FeaturesFile(String inFile, String type)\r
62        throws IOException\r
63     {\r
64         super(inFile, type);\r
65     }\r
66 \r
67     /**\r
68      * DOCUMENT ME!\r
69      *\r
70      * @throws IOException DOCUMENT ME!\r
71      */\r
72     public boolean parse(AlignmentI align,  Hashtable colours)\r
73         throws IOException\r
74     {\r
75       String line = null;\r
76       try\r
77       {\r
78         SequenceI seq = null;\r
79         String type, desc, token=null;\r
80 \r
81         int index, start, end;\r
82         float score;\r
83         StringTokenizer st;\r
84         SequenceFeature sf;\r
85         String featureGroup = null;\r
86 \r
87         boolean GFFFile = true;\r
88 \r
89         while ( (line = nextLine()) != null)\r
90         {\r
91           if(line.startsWith("#"))\r
92             continue;\r
93 \r
94           st = new StringTokenizer(line, "\t");\r
95           if (st.countTokens()>1 && st.countTokens() < 4 )\r
96           {\r
97             GFFFile = false;\r
98             type = st.nextToken();\r
99             if (type.equalsIgnoreCase("startgroup"))\r
100             {\r
101               featureGroup = st.nextToken();\r
102             }\r
103             else if (type.equalsIgnoreCase("endgroup"))\r
104             {\r
105               //We should check whether this is the current group,\r
106               //but at present theres no way of showing more than 1 group\r
107               st.nextToken();\r
108               featureGroup = null;\r
109             }\r
110             else\r
111             {\r
112               UserColourScheme ucs = new UserColourScheme(st.nextToken());\r
113               colours.put(type, ucs.findColour("A"));\r
114             }\r
115             continue;\r
116           }\r
117 \r
118           while (st.hasMoreElements())\r
119           {\r
120 \r
121             if(GFFFile)\r
122             {\r
123               // Still possible this is an old Jalview file,\r
124               // which does not have type colours at the beginning\r
125               token = st.nextToken();\r
126               seq = align.findName(token);\r
127               if(seq != null)\r
128               {\r
129                 desc = st.nextToken();\r
130                 type = st.nextToken();\r
131                 start = Integer.parseInt(st.nextToken());\r
132                 end = Integer.parseInt(st.nextToken());\r
133                 try\r
134                 {\r
135                   score = Float.parseFloat(st.nextToken());\r
136                 }\r
137                 catch (NumberFormatException ex)\r
138                 {\r
139                   score = 0;\r
140                 }\r
141 \r
142                 sf = new SequenceFeature(type, desc, start, end, score, null);\r
143 \r
144                 try\r
145                 {\r
146                   sf.setValue("STRAND", st.nextToken());\r
147                   sf.setValue("FRAME", st.nextToken());\r
148                 }\r
149                 catch (Exception ex)\r
150                 {}\r
151 \r
152                 seq.getDatasetSequence().addSequenceFeature(sf);\r
153 \r
154                 break;\r
155               }\r
156             }\r
157 \r
158             if(GFFFile && seq==null)\r
159             {\r
160               desc = token;\r
161             }\r
162             else\r
163               desc = st.nextToken();\r
164 \r
165 \r
166             token = st.nextToken();\r
167             if (!token.equals("ID_NOT_SPECIFIED"))\r
168             {\r
169               seq = align.findName(token);\r
170               st.nextToken();\r
171             }\r
172             else\r
173             {\r
174               try{\r
175                 index = Integer.parseInt(st.nextToken());\r
176                 seq = align.getSequenceAt(index);\r
177               }\r
178               catch(NumberFormatException ex)\r
179               {\r
180                 seq = null;\r
181               }\r
182             }\r
183 \r
184             if(seq==null)\r
185             {\r
186               System.out.println("Sequence not found: "+line);\r
187               break;\r
188             }\r
189 \r
190             start = Integer.parseInt(st.nextToken());\r
191             end = Integer.parseInt(st.nextToken());\r
192 \r
193             type = st.nextToken();\r
194 \r
195             if (!colours.containsKey(type))\r
196             {\r
197               // Probably the old style groups file\r
198               UserColourScheme ucs = new UserColourScheme(type);\r
199               colours.put(type, ucs.findColour("A"));\r
200             }\r
201 \r
202             sf = new SequenceFeature(type, desc, "", start, end, featureGroup);\r
203 \r
204             seq.getDatasetSequence().addSequenceFeature(sf);\r
205 \r
206             //If we got here, its not a GFFFile\r
207             GFFFile = false;\r
208           }\r
209         }\r
210       }\r
211       catch (Exception ex)\r
212       {\r
213         System.out.println(line);\r
214         ex.printStackTrace();\r
215         System.out.println("Error parsing groups file: " + ex +"\n"+line);\r
216         return false;\r
217       }\r
218 \r
219       return true;\r
220 \r
221     }\r
222 \r
223 \r
224     /**\r
225      * DOCUMENT ME!\r
226      *\r
227      * @param s DOCUMENT ME!\r
228      * @param len DOCUMENT ME!\r
229      * @param gaps DOCUMENT ME!\r
230      * @param displayId DOCUMENT ME!\r
231      *\r
232      * @return DOCUMENT ME!\r
233      */\r
234     public String printJalviewFormat(SequenceI [] seqs,\r
235                                      Hashtable visible)\r
236     {\r
237         StringBuffer out = new StringBuffer();\r
238         SequenceFeature [] next;\r
239 \r
240         if(visible==null || visible.size()<1)\r
241           return "No Features Visible";\r
242 \r
243         Enumeration en = visible.keys();\r
244         String type;\r
245         int color;\r
246         while( en.hasMoreElements() )\r
247         {\r
248           type = en.nextElement().toString();\r
249           color = Integer.parseInt( visible.get(type).toString() );\r
250           out.append(type + "\t"\r
251                      + jalview.util.Format.getHexString(\r
252                          new java.awt.Color(color)  )\r
253                      +"\n");\r
254         }\r
255 \r
256         //Work out which groups are both present and visible\r
257         Vector groups = new Vector();\r
258         int groupIndex = 0;\r
259 \r
260         for(int i=0; i<seqs.length; i++)\r
261         {\r
262           next = seqs[i].getSequenceFeatures();\r
263           if(next!=null)\r
264           {\r
265             for(int j=0; j<next.length; j++)\r
266             {\r
267               if (!visible.containsKey(next[j].type))\r
268                 continue;\r
269 \r
270               if (    next[j].featureGroup != null\r
271                   && !groups.contains(next[j].featureGroup))\r
272                 groups.addElement(next[j].featureGroup);\r
273             }\r
274           }\r
275         }\r
276 \r
277         String group = null;\r
278 \r
279         do\r
280         {\r
281           if (groups.size() > 0)\r
282           {\r
283             group = groups.elementAt(groupIndex).toString();\r
284             out.append("\nSTARTGROUP\t" + group + "\n");\r
285           }\r
286 \r
287 \r
288           for (int i = 0; i < seqs.length; i++)\r
289           {\r
290             next = seqs[i].getSequenceFeatures();\r
291             if (next != null)\r
292             {\r
293               for (int j = 0; j < next.length; j++)\r
294               {\r
295                 if (!visible.containsKey(next[j].type))\r
296                   continue;\r
297 \r
298                 if (group != null && !next[j].featureGroup.equals(group))\r
299                   continue;\r
300 \r
301                 if(next[j].description==null || next[j].description.equals(""))\r
302                   out.append(next[j].type+"\t");\r
303                 else\r
304                   out.append(next[j].description + "\t");\r
305 \r
306                 out.append(  seqs[i].getName() + "\t-1\t"\r
307                            + next[j].begin + "\t"\r
308                            + next[j].end + "\t"\r
309                            + next[j].type + "\n"\r
310                     );\r
311               }\r
312             }\r
313           }\r
314 \r
315           if(groups.size()>0)\r
316           {\r
317             out.append("ENDGROUP\t"+group+"\n");\r
318           }\r
319 \r
320           groupIndex++;\r
321         }\r
322         while(groupIndex < groups.size());\r
323 \r
324 \r
325       return out.toString();\r
326     }\r
327 \r
328     public String printGFFFormat(SequenceI [] seqs, Hashtable visible)\r
329     {\r
330       StringBuffer out = new StringBuffer();\r
331       SequenceFeature [] next;\r
332 \r
333       for(int i=0; i<seqs.length; i++)\r
334       {\r
335         if(seqs[i].getSequenceFeatures()!=null)\r
336         {\r
337           next = seqs[i].getSequenceFeatures();\r
338           for(int j=0; j<next.length; j++)\r
339           {\r
340             if(!visible.containsKey(next[j].type))\r
341               continue;\r
342 \r
343             out.append(seqs[i].getName() + "\t"\r
344                        + next[j].description + "\t"\r
345                        + next[j].type  + "\t"\r
346                        + next[j].begin + "\t"\r
347                        + next[j].end   + "\t"\r
348                        + next[j].score + "\t"\r
349                       );\r
350 \r
351             if(next[j].getValue("STRAND")!=null)\r
352               out.append(next[j].getValue("STRAND")+"\t");\r
353             else\r
354               out.append(".\t");\r
355             if(next[j].getValue("FRAME")!=null)\r
356               out.append(next[j].getValue("FRAME")+"\n");\r
357             else\r
358               out.append(".\n");\r
359 \r
360           }\r
361         }\r
362       }\r
363 \r
364       return out.toString();\r
365     }\r
366 \r
367     public void parse()\r
368     {\r
369       //IGNORED\r
370     }\r
371 \r
372     /**\r
373      * DOCUMENT ME!\r
374      *\r
375      * @return DOCUMENT ME!\r
376      */\r
377     public String print()\r
378     {\r
379         return "USE printGFFFormat() or printJalviewFormat()";\r
380     }\r
381 }\r