src/jalview/io/ClustalFile.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.io;
  22
  23 import jalview.datamodel.AlignmentAnnotation;
  24 import jalview.datamodel.Sequence;
  25 import jalview.datamodel.SequenceI;
  26 import jalview.util.Format;
  27
  28 import java.io.IOException;
  29 import java.util.HashMap;
  30 import java.util.Map;
  31 import java.util.StringTokenizer;
  32 import java.util.Vector;
  33
  34 public class ClustalFile extends AlignFile
  35 {
  36
  37   public ClustalFile()
  38   {
  39   }
  40
  41   public ClustalFile(String inFile, DataSourceType sourceType)
  42           throws IOException
  43   {
  44     super(inFile, sourceType);
  45   }
  46
  47   public ClustalFile(FileParse source) throws IOException
  48   {
  49     super(source);
  50   }
  51
  52   @Override
  53   public void initData()
  54   {
  55     super.initData();
  56   }
  57
  58   @Override
  59   public void parse() throws IOException
  60   {
  61     int i = 0;
  62     boolean flag = false;
  63     boolean top = false;
  64     StringBuffer pssecstr = new StringBuffer();
  65     StringBuffer consstr = new StringBuffer();
  66     Vector<String> headers = new Vector<>();
  67     Map<String, StringBuffer> seqhash = new HashMap<>();
  68     StringBuffer tempseq;
  69     String line, id;
  70     StringTokenizer str;
  71
  72     try
  73     {
  74       while ((line = nextLine()) != null)
  75       {
  76         if (line.length() == 0)
  77         {
  78           top = true;
  79         }
  80         boolean isConservation = line.startsWith(SPACE)
  81                 || line.startsWith(TAB);
  82         if (!isConservation)
  83         {
  84           str = new StringTokenizer(line);
  85
  86           if (str.hasMoreTokens())
  87           {
  88             id = str.nextToken();
  89
  90             if (id.equalsIgnoreCase("CLUSTAL"))
  91             {
  92               flag = true;
  93             }
  94             else
  95             {
  96               if (flag)
  97               {
  98                 if (seqhash.containsKey(id))
  99                 {
 100                   tempseq = seqhash.get(id);
 101                 }
 102                 else
 103                 {
 104                   tempseq = new StringBuffer();
 105                   seqhash.put(id, tempseq);
 106                 }
 107
 108                 if (!(headers.contains(id)))
 109                 {
 110                   headers.addElement(id);
 111                 }
 112
 113                 if (str.hasMoreTokens())
 114                 {
 115                   tempseq.append(str.nextToken());
 116                 }
 117                 top = false;
 118               }
 119             }
 120           }
 121           else
 122           {
 123             flag = true;
 124           }
 125         }
 126         else
 127         {
 128           if (line.matches("\\s+(-|\\.|\\(|\\[|\\]|\\))+"))
 129           {
 130             if (top)
 131             {
 132               pssecstr.append(line.trim());
 133             }
 134             else
 135             {
 136               consstr.append(line.trim());
 137             }
 138           }
 139         }
 140       }
 141     } catch (IOException e)
 142     {
 143       System.err.println("Exception parsing clustal file " + e);
 144       e.printStackTrace();
 145     }
 146
 147     if (flag)
 148     {
 149       this.noSeqs = headers.size();
 150
 151       // Add sequences to the hash
 152       for (i = 0; i < headers.size(); i++)
 153       {
 154         if (seqhash.get(headers.elementAt(i)) != null)
 155         {
 156           if (maxLength < seqhash.get(headers.elementAt(i)).toString()
 157                   .length())
 158           {
 159             maxLength = seqhash.get(headers.elementAt(i)).toString()
 160                     .length();
 161           }
 162
 163           Sequence newSeq = parseId(headers.elementAt(i).toString());
 164           newSeq.setSequence(
 165                   seqhash.get(headers.elementAt(i).toString()).toString());
 166
 167           seqs.addElement(newSeq);
 168         }
 169         else
 170         {
 171           System.err.println("Clustal File Reader: Can't find sequence for "
 172                   + headers.elementAt(i));
 173         }
 174       }
 175       AlignmentAnnotation lastssa = null;
 176       if (pssecstr.length() == maxLength)
 177       {
 178         Vector<AlignmentAnnotation> ss = new Vector<>();
 179         AlignmentAnnotation ssa = lastssa = StockholmFile
 180                 .parseAnnotationRow(ss, "secondary structure",
 181                         pssecstr.toString());
 182         ssa.label = "Secondary Structure";
 183         annotations.addElement(ssa);
 184       }
 185       if (consstr.length() == maxLength)
 186       {
 187         Vector<AlignmentAnnotation> ss = new Vector<>();
 188         AlignmentAnnotation ssa = StockholmFile.parseAnnotationRow(ss,
 189                 "secondary structure", consstr.toString());
 190         ssa.label = "Consensus Secondary Structure";
 191         if (lastssa == null || !lastssa.getRNAStruc()
 192                 .equals(ssa.getRNAStruc().replace('-', '.')))
 193         {
 194           annotations.addElement(ssa);
 195         }
 196       }
 197     }
 198   }
 199
 200   @Override
 201   public String print(SequenceI[] s, boolean jvsuffix)
 202   {
 203     StringBuffer out = new StringBuffer("CLUSTAL" + newline + newline);
 204
 205     int max = 0;
 206     int maxid = 0;
 207
 208     int i = 0;
 209
 210     while ((i < s.length) && (s[i] != null))
 211     {
 212       String tmp = printId(s[i], jvsuffix);
 213
 214       max = Math.max(max, s[i].getLength());
 215
 216       if (tmp.length() > maxid)
 217       {
 218         maxid = tmp.length();
 219       }
 220
 221       i++;
 222     }
 223
 224     if (maxid < 15)
 225     {
 226       maxid = 15;
 227     }
 228
 229     maxid++;
 230
 231     int len = 60;
 232     int nochunks = (max / len) + (max % len > 0 ? 1 : 0);
 233
 234     for (i = 0; i < nochunks; i++)
 235     {
 236       int j = 0;
 237
 238       while ((j < s.length) && (s[j] != null))
 239       {
 240         out.append(new Format("%-" + maxid + "s")
 241                 .form(printId(s[j], jvsuffix) + " "));
 242
 243         int chunkStart = i * len;
 244         int chunkEnd = chunkStart + len;
 245
 246         int length = s[j].getLength();
 247         if ((chunkEnd < length) && (chunkStart < length))
 248         {
 249           out.append(s[j].getSequenceAsString(chunkStart, chunkEnd));
 250         }
 251         else
 252         {
 253           if (chunkStart < length)
 254           {
 255             out.append(s[j].getSequenceAsString().substring(chunkStart));
 256           }
 257         }
 258
 259         out.append(newline);
 260         j++;
 261       }
 262
 263       out.append(newline);
 264     }
 265
 266     return out.toString();
 267   }
 268 }