Javadoc fixes
[jabaws.git] / datamodel / compbio / data / sequence / ClustalAlignmentUtil.java
index 5fce997..fc44c44 100644 (file)
@@ -24,9 +24,7 @@ import java.io.FileInputStream;
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.io.InputStreamReader;\r
-import java.io.OutputStream;\r
-import java.io.OutputStreamWriter;\r
-import java.io.PrintWriter;\r
+import java.io.Writer;\r
 import java.util.ArrayList;\r
 import java.util.Arrays;\r
 import java.util.HashMap;\r
@@ -40,251 +38,251 @@ import java.util.logging.Logger;
  * \r
  * @author Petr Troshin based on jimp class\r
  * \r
- *         Date September 2009\r
+ * @version 1.0 September 2009\r
  * \r
  */\r
 public final class ClustalAlignmentUtil {\r
 \r
-    private static final Logger log = Logger\r
-           .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
-\r
-    /**\r
-     * Dash char to be used as gap char in the alignments\r
-     */\r
-    public static final char gapchar = '-';\r
-\r
-    /*\r
-     * Number of spaces separating the name and the sequence\r
-     */\r
-    private static final String spacer = "      "; // 6 space characters\r
-    /*\r
-     * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
-     * longer than that it gets trimmed in the end\r
-     */\r
-    private static final int maxNameLength = 30; // Maximum name length\r
-    /*\r
-     * If all sequences names in the alignment is shorter than\r
-     * minNameHolderLength than spaces are added to complete the name up to\r
-     * minNameHolderLength\r
-     */\r
-    private static final int minNameHolderLength = 10; // Minimum number of\r
-\r
-    // TODO check whether clustal still loads data if length is 60!\r
-    private static final int oneLineAlignmentLength = 60; // this could in fact\r
-\r
-    // be 50\r
-\r
-    // for long names ~30 chars\r
-\r
-    /**\r
-     * Read Clustal formatted alignment. Limitations: Does not read consensus\r
-     * \r
-     * Sequence names as well as the sequences are not guaranteed to be unique!\r
-     * \r
-     * @throws {@link IOException}\r
-     * @throws {@link UnknownFileFormatException}\r
-     */\r
-    public static Alignment readClustalFile(InputStream instream)\r
-           throws IOException, UnknownFileFormatException {\r
-\r
-       boolean flag = false;\r
-\r
-       List<String> headers = new ArrayList<String>();\r
-       Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
-       FastaSequence[] seqs = null;\r
-\r
-       String line;\r
-\r
-       BufferedReader breader = new BufferedReader(new InputStreamReader(\r
-               instream));\r
-       while ((line = breader.readLine()) != null) {\r
-           if (line.indexOf(" ") != 0) {\r
-               java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
-               String id = "";\r
-\r
-               if (str.hasMoreTokens()) {\r
-                   id = str.nextToken();\r
-                   // PROBCONS output clustal formatted file with not mention\r
-                   // of CLUSTAL (:-))\r
-                   if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
-                       flag = true;\r
-                   } else {\r
-                       if (flag) {\r
-                           StringBuffer tempseq;\r
-                           if (seqhash.containsKey(id)) {\r
-                               tempseq = seqhash.get(id);\r
-                           } else {\r
-                               tempseq = new StringBuffer();\r
-                               seqhash.put(id, tempseq);\r
-                           }\r
-\r
-                           if (!(headers.contains(id))) {\r
-                               headers.add(id);\r
-                           }\r
-\r
-                           tempseq.append(str.nextToken());\r
+       private static final Logger log = Logger\r
+                       .getLogger(ClustalAlignmentUtil.class.getCanonicalName());\r
+\r
+       /**\r
+        * Dash char to be used as gap char in the alignments\r
+        */\r
+       public static final char gapchar = '-';\r
+\r
+       /*\r
+        * Number of spaces separating the name and the sequence\r
+        */\r
+       private static final String spacer = "      "; // 6 space characters\r
+       /*\r
+        * name length limit is 30 characters! 2.0.7 - 2.0.12 clustalw /* if name is\r
+        * longer than that it gets trimmed in the end\r
+        */\r
+       private static final int maxNameLength = 30; // Maximum name length\r
+       /*\r
+        * If all sequences names in the alignment is shorter than\r
+        * minNameHolderLength than spaces are added to complete the name up to\r
+        * minNameHolderLength\r
+        */\r
+       private static final int minNameHolderLength = 10; // Minimum number of\r
+\r
+       // TODO check whether clustal still loads data if length is 60!\r
+       private static final int oneLineAlignmentLength = 60; // this could in fact\r
+\r
+       // be 50\r
+\r
+       // for long names ~30 chars\r
+\r
+       /**\r
+        * Read Clustal formatted alignment. Limitations: Does not read consensus\r
+        * \r
+        * Sequence names as well as the sequences are not guaranteed to be unique!\r
+        * \r
+        * @throws {@link IOException}\r
+        * @throws {@link UnknownFileFormatException}\r
+        */\r
+       public static Alignment readClustalFile(InputStream instream)\r
+                       throws IOException, UnknownFileFormatException {\r
+\r
+               boolean flag = false;\r
+\r
+               List<String> headers = new ArrayList<String>();\r
+               Map<String, StringBuffer> seqhash = new HashMap<String, StringBuffer>();\r
+               FastaSequence[] seqs = null;\r
+\r
+               String line;\r
+\r
+               BufferedReader breader = new BufferedReader(new InputStreamReader(\r
+                               instream));\r
+               while ((line = breader.readLine()) != null) {\r
+                       if (line.indexOf(" ") != 0) {\r
+                               java.util.StringTokenizer str = new StringTokenizer(line, " ");\r
+                               String id = "";\r
+\r
+                               if (str.hasMoreTokens()) {\r
+                                       id = str.nextToken();\r
+                                       // PROBCONS output clustal formatted file with not mention\r
+                                       // of CLUSTAL (:-))\r
+                                       if (id.equals("CLUSTAL") || id.equals("PROBCONS")) {\r
+                                               flag = true;\r
+                                       } else {\r
+                                               if (flag) {\r
+                                                       StringBuffer tempseq;\r
+                                                       if (seqhash.containsKey(id)) {\r
+                                                               tempseq = seqhash.get(id);\r
+                                                       } else {\r
+                                                               tempseq = new StringBuffer();\r
+                                                               seqhash.put(id, tempseq);\r
+                                                       }\r
+\r
+                                                       if (!(headers.contains(id))) {\r
+                                                               headers.add(id);\r
+                                                       }\r
+\r
+                                                       tempseq.append(str.nextToken());\r
+                                               }\r
+                                       }\r
+                               }\r
                        }\r
-                   }\r
                }\r
-           }\r
-       }\r
-       breader.close();\r
+               breader.close();\r
 \r
-       // TODO improve this bit\r
-       if (flag) {\r
+               // TODO improve this bit\r
+               if (flag) {\r
 \r
-           // Add sequences to the hash\r
-           seqs = new FastaSequence[headers.size()];\r
-           for (int i = 0; i < headers.size(); i++) {\r
-               if (seqhash.get(headers.get(i)) != null) {\r
+                       // Add sequences to the hash\r
+                       seqs = new FastaSequence[headers.size()];\r
+                       for (int i = 0; i < headers.size(); i++) {\r
+                               if (seqhash.get(headers.get(i)) != null) {\r
 \r
-                   FastaSequence newSeq = new FastaSequence(headers.get(i),\r
-                           seqhash.get(headers.get(i)).toString());\r
+                                       FastaSequence newSeq = new FastaSequence(headers.get(i),\r
+                                                       seqhash.get(headers.get(i)).toString());\r
 \r
-                   seqs[i] = newSeq;\r
+                                       seqs[i] = newSeq;\r
 \r
-               } else {\r
-                   // should not happened\r
-                   throw new AssertionError(\r
-                           "Bizarreness! Can't find sequence for "\r
-                                   + headers.get(i));\r
+                               } else {\r
+                                       // should not happened\r
+                                       throw new AssertionError(\r
+                                                       "Bizarreness! Can't find sequence for "\r
+                                                                       + headers.get(i));\r
+                               }\r
+                       }\r
                }\r
-           }\r
-       }\r
-       if (seqs == null || seqs.length == 0) {\r
-           throw new UnknownFileFormatException(\r
-                   "Input does not appear to be a clustal file! ");\r
-       }\r
-       return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
-               Program.CLUSTAL, gapchar));\r
-    }\r
-\r
-    /**\r
-     * \r
-     * @param input\r
-     * @return true if the file is recognised as Clustal formatted alignment,\r
-     *         false otherwise\r
-     */\r
-    public static boolean isValidClustalFile(InputStream input) {\r
-       if (input == null) {\r
-           throw new NullPointerException("Input is expected!");\r
-       }\r
-       BufferedReader breader = new BufferedReader(\r
-               new InputStreamReader(input));\r
-       try {\r
-           if (input.available() < 10) {\r
-               return false;\r
-           }\r
-           // read first 10 lines to find "Clustal"\r
-           for (int i = 0; i < 10; i++) {\r
-               String line = breader.readLine();\r
-               if (line != null) {\r
-                   line = line.toUpperCase().trim();\r
-                   if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
-                       return true;\r
-                   }\r
+               if (seqs == null || seqs.length == 0) {\r
+                       throw new UnknownFileFormatException(\r
+                                       "Input does not appear to be a clustal file! ");\r
                }\r
-           }\r
-\r
-           breader.close();\r
-       } catch (IOException e) {\r
-           log.severe("Could not read from the stream! "\r
-                   + e.getLocalizedMessage() + e.getCause());\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, breader);\r
-       }\r
-       return false;\r
-    }\r
-\r
-    /**\r
-     * Write Clustal formatted alignment Limitations: does not record the\r
-     * consensus. Potential bug - records 60 chars length alignment where\r
-     * Clustal would have recorded 50 chars.\r
-     * \r
-     * @param outStream\r
-     * \r
-     * @param alignment\r
-     * @throws IOException\r
-     */\r
-    public static void writeClustalAlignment(final OutputStream outStream,\r
-           final Alignment alignment) throws IOException {\r
-       List<FastaSequence> seqs = alignment.getSequences();\r
-\r
-       PrintWriter out = new PrintWriter(new OutputStreamWriter(outStream));\r
-\r
-       out.write("CLUSTAL\n\n\n");\r
-\r
-       int max = 0;\r
-       int maxidLength = 0;\r
-\r
-       int i = 0;\r
-       // Find the longest sequence name\r
-       for (FastaSequence fs : seqs) {\r
-           String tmp = fs.getId();\r
-\r
-           if (fs.getSequence().length() > max) {\r
-               max = fs.getSequence().length();\r
-           }\r
-           if (tmp.length() > maxidLength) {\r
-               maxidLength = tmp.length();\r
-           }\r
-           i++;\r
-       }\r
-       if (maxidLength < minNameHolderLength) {\r
-           maxidLength = minNameHolderLength;\r
-       }\r
-       if (maxidLength > maxNameLength) {\r
-           maxidLength = 30; // the rest will be trimmed\r
+               return new Alignment(Arrays.asList(seqs), new AlignmentMetadata(\r
+                               Program.CLUSTAL, gapchar));\r
        }\r
 \r
-       int oneLineAlignmentLength = 60;\r
-       int nochunks = max / oneLineAlignmentLength + 1;\r
-\r
-       for (i = 0; i < nochunks; i++) {\r
-           int j = 0;\r
-           for (FastaSequence fs : seqs) {\r
-\r
-               String name = fs.getId();\r
-               // display at most 30 characters in the name, keep the names\r
-               // 6 spaces away from the alignment for longest sequence names,\r
-               // and more than this for shorter names\r
-               out.format("%-" + maxidLength + "s" + spacer,\r
-                       (name.length() > maxNameLength ? name.substring(0,\r
-                               maxidLength) : name));\r
-               int start = i * oneLineAlignmentLength;\r
-               int end = start + oneLineAlignmentLength;\r
-\r
-               if (end < fs.getSequence().length()\r
-                       && start < fs.getSequence().length()) {\r
-                   out.write(fs.getSequence().substring(start, end) + "\n");\r
-               } else {\r
-                   if (start < fs.getSequence().length()) {\r
-                       out.write(fs.getSequence().substring(start) + "\n");\r
-                   }\r
+       /**\r
+        * Please note this method closes the input stream provided as a parameter\r
+        * \r
+        * @param input\r
+        * @return true if the file is recognised as Clustal formatted alignment,\r
+        *         false otherwise\r
+        */\r
+       public static boolean isValidClustalFile(InputStream input) {\r
+               if (input == null) {\r
+                       throw new NullPointerException("Input is expected!");\r
                }\r
-               j++;\r
-           }\r
-           out.write("\n");\r
-       }\r
-       try {\r
-           out.close();\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, out);\r
+               BufferedReader breader = new BufferedReader(\r
+                               new InputStreamReader(input));\r
+               try {\r
+                       if (input.available() < 10) {\r
+                               return false;\r
+                       }\r
+                       // read first 10 lines to find "Clustal"\r
+                       for (int i = 0; i < 10; i++) {\r
+                               String line = breader.readLine();\r
+                               if (line != null) {\r
+                                       line = line.toUpperCase().trim();\r
+                                       if (line.contains("CLUSTAL") || line.contains("PROBCONS")) {\r
+                                               return true;\r
+                                       }\r
+                               }\r
+                       }\r
+\r
+                       breader.close();\r
+               } catch (IOException e) {\r
+                       log.severe("Could not read from the stream! "\r
+                                       + e.getLocalizedMessage() + e.getCause());\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, breader);\r
+               }\r
+               return false;\r
        }\r
-    }\r
 \r
-    public static Alignment readClustalFile(File file)\r
-           throws UnknownFileFormatException, IOException {\r
-       if (file == null) {\r
-           throw new NullPointerException("File is expected!");\r
+       /**\r
+        * Write Clustal formatted alignment Limitations: does not record the\r
+        * consensus. Potential bug - records 60 chars length alignment where\r
+        * Clustal would have recorded 50 chars.\r
+        * \r
+        * @param out\r
+        * \r
+        * @param alignment\r
+        * @throws IOException\r
+        */\r
+       public static void writeClustalAlignment(final Writer out,\r
+                       final Alignment alignment) throws IOException {\r
+               List<FastaSequence> seqs = alignment.getSequences();\r
+\r
+               out.write("CLUSTAL\n\n\n");\r
+\r
+               int max = 0;\r
+               int maxidLength = 0;\r
+\r
+               int i = 0;\r
+               // Find the longest sequence name\r
+               for (FastaSequence fs : seqs) {\r
+                       String tmp = fs.getId();\r
+\r
+                       if (fs.getSequence().length() > max) {\r
+                               max = fs.getSequence().length();\r
+                       }\r
+                       if (tmp.length() > maxidLength) {\r
+                               maxidLength = tmp.length();\r
+                       }\r
+                       i++;\r
+               }\r
+               if (maxidLength < minNameHolderLength) {\r
+                       maxidLength = minNameHolderLength;\r
+               }\r
+               if (maxidLength > maxNameLength) {\r
+                       maxidLength = 30; // the rest will be trimmed\r
+               }\r
+\r
+               int oneLineAlignmentLength = 60;\r
+               int nochunks = max / oneLineAlignmentLength + 1;\r
+\r
+               for (i = 0; i < nochunks; i++) {\r
+                       int j = 0;\r
+                       for (FastaSequence fs : seqs) {\r
+\r
+                               String name = fs.getId();\r
+                               // display at most 30 characters in the name, keep the names\r
+                               // 6 spaces away from the alignment for longest sequence names,\r
+                               // and more than this for shorter names\r
+                               out.write(String.format(\r
+                                               "%-" + maxidLength + "s" + spacer,\r
+                                               (name.length() > maxNameLength ? name.substring(0,\r
+                                                               maxidLength) : name)));\r
+                               int start = i * oneLineAlignmentLength;\r
+                               int end = start + oneLineAlignmentLength;\r
+\r
+                               if (end < fs.getSequence().length()\r
+                                               && start < fs.getSequence().length()) {\r
+                                       out.write(fs.getSequence().substring(start, end) + "\n");\r
+                               } else {\r
+                                       if (start < fs.getSequence().length()) {\r
+                                               out.write(fs.getSequence().substring(start) + "\n");\r
+                                       }\r
+                               }\r
+                               j++;\r
+                       }\r
+                       out.write("\n");\r
+               }\r
+               try {\r
+                       out.close();\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, out);\r
+               }\r
        }\r
-       FileInputStream fio = new FileInputStream(file);\r
-       Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
-       try {\r
-           fio.close();\r
-       } finally {\r
-           SequenceUtil.closeSilently(log, fio);\r
+\r
+       public static Alignment readClustalFile(File file)\r
+                       throws UnknownFileFormatException, IOException {\r
+               if (file == null) {\r
+                       throw new NullPointerException("File is expected!");\r
+               }\r
+               FileInputStream fio = new FileInputStream(file);\r
+               Alignment seqAl = ClustalAlignmentUtil.readClustalFile(fio);\r
+               try {\r
+                       fio.close();\r
+               } finally {\r
+                       SequenceUtil.closeSilently(log, fio);\r
+               }\r
+               return seqAl;\r
        }\r
-       return seqAl;\r
-    }\r
 }\r