restructure file exporter
[jalview.git] / src / jalview / io / HMMFile.java
index 764db7f..7063fe9 100644 (file)
@@ -1,6 +1,6 @@
 package jalview.io;
 
-import jalview.datamodel.EValueStatistic;
+import jalview.datamodel.HMMNode;
 import jalview.datamodel.HiddenMarkovModel;
 
 import java.io.BufferedReader;
@@ -8,14 +8,12 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Scanner;
 
+
 /**
  * reads in and writes out a HMMER standard file
  * 
@@ -28,18 +26,28 @@ public class HMMFile extends FileParse
   // HMM to store file data
   HiddenMarkovModel hmm = new HiddenMarkovModel();
 
+
   // Source of file
   String dataObject;
 
-  // number of symbols
-  int numberOfSymbols;
-
   // number of possible transitions
-  final int NUMBER_OF_TRANSITIONS = 7;
+  final static int NUMBER_OF_TRANSITIONS = 7;
+
+  final static String NEW_LINE = "\n";
+
 
   // file header
   String fileHeader;
 
+  int numberOfSymbols;
+
+  final static String SPACE = " ";
+
+  final static String COMPO = "COMPO";
+
+  final static String EMPTY = "";
+
+
   /**
    * Constructor which contains model to be filled or exported
    * 
@@ -51,6 +59,16 @@ public class HMMFile extends FileParse
     dataObject = dataSource;
   }
 
+  public HiddenMarkovModel getHmm()
+  {
+    return hmm;
+  }
+
+  public void setHmm(HiddenMarkovModel model)
+  {
+    this.hmm = model;
+  }
+
   /**
    * reads data from HMM file
    * 
@@ -66,6 +84,16 @@ public class HMMFile extends FileParse
 
   }
 
+  public String getDataObject()
+  {
+    return dataObject;
+  }
+
+  public void setDataObject(String value)
+  {
+    this.dataObject = value;
+  }
+
   /**
    * imports file properties from hmm file
    * 
@@ -88,30 +116,17 @@ public class HMMFile extends FileParse
                               // properties)
         {
           readingFile = false;
-          hmm.fillSymbols(line);
-          numberOfSymbols = hmm.getSymbols().size();
+          hmm.fillSymbols(parser);
+          numberOfSymbols = hmm.getNumberOfSymbols();
         }
-        else if ("STATS".equals(next)) // reads e-value stats into separate
-                                       // field
-                                     // on HMM object
+        else if ("STATS".equals(next))
         {
-          readStats(parser);
-        }
-        else if ("GA".equals(next) || "TC".equals(next)
-                || "NC".equals(next)) // reads
-                                                                            // pfam
-                                                                            // data
-                                                                            // into
-                                                                            // separate
-                                                                            // field
-                                                                            // on
-                                                                            // HMM
-                                                                            // object
-        {
-          Double[] data = new Double[2];
-          data[0] = parser.nextDouble();
-          data[1] = parser.nextDouble();
-          hmm.setPFAMData(next, data);
+          parser.next();
+          String key;
+          String value;
+          key = parser.next();
+          value = parser.next() + SPACE + SPACE + parser.next();
+          hmm.addFileProperty(key, value);
         }
         else
         {
@@ -119,9 +134,9 @@ public class HMMFile extends FileParse
           String value = parser.next();
           while (parser.hasNext())
           {
-            value = value + " " + parser.next();
+            value = value + SPACE + parser.next();
           }
-          hmm.put(key, value);
+          hmm.addFileProperty(key, value);
         }
         parser.close();
       }
@@ -135,31 +150,6 @@ public class HMMFile extends FileParse
   }
 
   /**
-   * creates a new EValueStatistic object to store stats
-   * 
-   * @param parser
-   *          Scanner which contains data for STATS line
-   * 
-   */
-  public void readStats(Scanner parser)
-  {
-    if (parser.hasNext())
-    {
-    String name;
-    double slope;
-    double location;
-    String configuration;
-
-    configuration = parser.next();
-    name = parser.next();
-    slope = parser.nextDouble();
-    location = parser.nextDouble();
-    hmm.addStatistic(name,
-            new EValueStatistic(configuration, slope, location));
-    }
-  }
-
-  /**
    * parses the model data from the hmm file
    * 
    * @param input
@@ -168,63 +158,46 @@ public class HMMFile extends FileParse
    */
   public void parseModel(BufferedReader input) throws IOException
   {
-
-    String line = input.readLine();
-    Scanner scanner = new Scanner(line);
-    String next = scanner.next();
-    if ("COMPO".equals(next)) // checks to and stores COMPO data if present
+    for (int i = 0; i < hmm.getLength() + 1; i++)
     {
-      for (int i = 0; i < numberOfSymbols; i++)
-
+      hmm.getNodes().add(new HMMNode());
+      String next;
+      String line;
+      line = input.readLine();
+      Scanner matchReader = new Scanner(line);
+      next = matchReader.next();
+      if (next.equals(COMPO) || i > 0)
       {
-        hmm.getAverageMatchStateEmissionProbabilities()
-                .add(scanner.nextDouble());
+        // stores match emission line in list
+        List<Double> matches = new ArrayList<>();
+        matches = fillList(matchReader, numberOfSymbols);
+        hmm.getNodes().get(i).setMatchEmissions(matches);
+        if (i > 0)
+        {
+          parseAnnotations(matchReader, i);
+        }
       }
-    }
-    scanner.close();
-    parseBeginNodeData(input);
-    for (int i = 0; i < hmm.getLength(); i++)
-    {
-      Scanner matchReader = new Scanner(input.readLine());
-      matchReader.nextInt(); // skips number indicating position in HMM
-      hmm.getMatchEmissions()
-              .add(fillList(matchReader, numberOfSymbols));
-      parseAnnotations(matchReader, i);
       matchReader.close();
-      Scanner insertReader = new Scanner(input.readLine());
-      hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
+      // stores insert emission line in list
+      line = input.readLine();
+      Scanner insertReader = new Scanner(line);
+      List<Double> inserts = new ArrayList<>();
+      inserts = fillList(insertReader, numberOfSymbols);
+      hmm.getNodes().get(i).setInsertEmissions(inserts);
       insertReader.close();
-      Scanner transitionReader = new Scanner(input.readLine());
-      hmm.getStateTransitions()
-              .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
+
+      // stores state transition line in list
+      line = input.readLine();
+      Scanner transitionReader = new Scanner(line);
+      List<Double> transitions = new ArrayList<>();
+      transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
+      hmm.getNodes().get(i).setStateTransitions(transitions);
       transitionReader.close();
     }
 
   }
 
   /**
-   * parses the begin state transitions and insert 0 emissions
-   * 
-   * @param input
-   *          buffered reader used to read model
-   * @param currentline
-   *          string contain all data on current line of buffered reader
-   * @throws IOException
-   */
-
-  public void parseBeginNodeData(BufferedReader input)
-          throws IOException
-  {
-    Scanner scanner = new Scanner(input.readLine());
-    hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
-    scanner.close();
-    Scanner scannerTransitions = new Scanner(input.readLine());
-    hmm.setBeginStateTransitions(
-            fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
-    scannerTransitions.close();
-  }
-
-  /**
    * parses annotations on match emission line
    * 
    * @param scanner
@@ -234,20 +207,35 @@ public class HMMFile extends FileParse
    */
   public void parseAnnotations(Scanner scanner, int index)
   {
-    if (hmm.getMapAnnotationFlag())
+    if (hmm.mapIsActive())
     {
-      hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
+      int column;
+      column = scanner.nextInt();
+      hmm.getNodes().get(index).setAlignmentColumn(column);
     }
     else
     {
       scanner.next();
     }
-    hmm.getAnnotations().add(new HashMap<String, Character>());
-    hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
+
+    char consensusR;
+    consensusR = charValue(scanner.next());
+    hmm.getNodes().get(index).setConsensusResidue(consensusR);
+
+      char reference;
+      reference = charValue(scanner.next());
+      hmm.getNodes().get(index).setReferenceAnnotation(reference);
+
+
+      char value;
+      value = charValue(scanner.next());
+      hmm.getNodes().get(index).setMaskValue(value);
+
+    char consensusS;
+    consensusS = charValue(scanner.next());
+    hmm.getNodes().get(index).setConsensusStructure(consensusS);
   }
+
   /**
    * 
    * @param transition
@@ -298,17 +286,17 @@ public class HMMFile extends FileParse
           int numberOfElements)
   {
     List<Double> list = new ArrayList<>();
-    String next;
     for (int i = 0; i < numberOfElements; i++)
     {
-      next = input.next();
+
+      String next = input.next();
       if (next.contains("*")) // state transitions to or from delete states
                               // occasionally have values of -infinity. These
                               // values are represented by an * in the .hmm
                               // file, and by a null value in the
                               // HiddenMarkovModel class
       {
-        list.add(null);
+        list.add(Double.NEGATIVE_INFINITY);
       }
       else
       {
@@ -318,111 +306,268 @@ public class HMMFile extends FileParse
     return list;
   }
 
+  
   /**
-   * writes a HiddenMarkovModel to a file. Needs mode work to make file more
-   * readable for humans (align columns)
+   * writes a HiddenMarkovModel to a file
    * 
    * @param exportLocation
    *          Filename, URL or Pasted String to write to
    * @throws FileNotFoundException
    * @throws UnsupportedEncodingException
-   */
-  public void exportFile(String exportLocation)
-          throws FileNotFoundException, UnsupportedEncodingException
+   *
+   **/
+  
+  public void exportFile(String exportLocation) throws IOException
   {
-    PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
-    writer.println(fileHeader);
-    for (Map.Entry<String, String> entry : hmm.getFileProperties()
-            .entrySet())
+    StringBuilder file = new StringBuilder();
+    appendFileProperties(file);
+    appendModel(file);
+    
+    file.append("//");
+
+  }
+
+  public String addData(int initialColumnSeparation,
+          int columnSeparation, List<String> data)
+  {
+    String line = EMPTY;
+    int index = 0;
+    for (String value : data)
     {
-      writer.println(entry.getKey() + " " + entry.getValue());
+      if (index == 0)
+      {
+        line += String.format("%" + initialColumnSeparation + "s", value);
+      }
+      else
+      {
+        line += String.format("%" + columnSeparation + "s", value);
+      }
+      index++;
     }
-    writer.println(
-            "HMM" + " " + convertCharListToString(hmm.getSymbols()));
-    writer.println("m->m m->i m->d i->m i->i d->m d->d");
-    if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
+    return line;
+  }
+
+  public static List<String> charListToStringList(List<Character> list)
+  {
+    List<String> strList = new ArrayList<>();
+    for (char value : list)
     {
-      writer.println("COMPO" + " " + convertDoubleListToString(
-              hmm.getAverageMatchStateEmissionProbabilities()));
+      String strValue = Character.toString(value);
+      strList.add(strValue);
     }
-    writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
-    writer.println(
-            convertDoubleListToString(hmm.getBeginStateTransitions()));
+    return strList;
+  }
 
-    for (Integer i = 0; i < hmm.getLength(); i++)
+  public static List<String> doubleListToStringList(List<Double> list,
+          int noOfDecimals)
+  {
+    List<String> strList = new ArrayList<>();
+    for (double value : list)
     {
-      String matchEmissionLine = i.toString() + " "; // adds node index
-      matchEmissionLine += convertDoubleListToString(
-              hmm.getMatchEmissions().get(i)); // adds match emissions
-      matchEmissionLine += " "
-              + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
-                                                                   // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
-                                                                    // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
-                                                                  // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
-                                                                  // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
-                                                                  // annotation
-      writer.println(matchEmissionLine);
-
-      writer.println(
-              convertDoubleListToString(hmm.getInsertEmissions().get(i)));
-      writer.println(
-              convertDoubleListToString(hmm.getStateTransitions().get(i)));
-    }
-    writer.println("//");
+      String strValue;
+      if (value == Double.NEGATIVE_INFINITY)
+      {
+        strValue = "*";
+      }
+      else
+      {
+        strValue = String.format("%.5f", value);
+      }
 
-    writer.close();
+      strList.add(strValue);
+    }
+    return strList;
   }
 
-  /**
-   * converts an list of characters to a string with items separated by spaces
-   * 
-   * @param list
-   *          character list to be converted
-   * @return string value of char list
-   */
-  public String convertCharListToString(List<Character> list)
+  public static List<String> stringArrayToStringList(String[] array)
   {
-    String string = "";
-    for (Character item : list)
+    List<String> list = new ArrayList<>();
+    for (String value : array)
     {
-      string = string + item.toString() + " ";
+      list.add(value);
     }
 
-    return string;
+    return list;
   }
-  
-  /**
-   * converts an list of doubles to a string with items separated by spaces
-   * 
-   * @param list
-   *          double list to be converted
-   * @return string value of double list
-   */
-  public String convertDoubleListToString(List<Double> list)
+
+  void appendModel(StringBuilder file)
   {
-    String string = "";
-    for (Double item : list)
+    String symbolLine = "HMM";
+    List<Character> charSymbols = hmm.getSymbols();
+    List<String> strSymbols;
+    strSymbols = charListToStringList(charSymbols);
+    symbolLine += addData(11, 9, strSymbols);
+    file.append(symbolLine + NEW_LINE);
+
+    String transitionTypeLine = "";
+    List<String> transitionTypes;
+    transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
+    transitionTypeLine += addData(16, 9, transitionTypes);
+    file.append(transitionTypeLine + NEW_LINE);
+
+    int length = hmm.getLength();
+
+    for (int node = 0; node <= length; node++)
     {
-      if (item != null)
+      String matchLine;
+      if (node == 0)
       {
-        string = string + item.toString() + " ";
+        matchLine = String.format("%7s", "COMPO");
       }
       else
       {
-        string = string + "*" + " ";
+        matchLine = String.format("%7s", node);
+      }
+
+      List<String> strMatches;
+      List<Double> doubleMatches;
+      doubleMatches = hmm.getNode(node).getMatchEmissions();
+      strMatches = doubleListToStringList(doubleMatches, 5);
+      matchLine += addData(10, 9, strMatches);
+
+
+      if (node != 0)
+      {
+        matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
+        matchLine += SPACE + hmm.getConsensusResidue(node);
+        matchLine += SPACE + hmm.getReferenceAnnotation(node);
+        matchLine += SPACE + hmm.getMaskedValue(node);
+        matchLine += SPACE + hmm.getConsensusStructure(node);
+
       }
 
+      file.append(matchLine + NEW_LINE);
+      
+      String insertLine = EMPTY;
+      List<String> strInserts;
+      List<Double> doubleInserts;
+      doubleInserts = hmm.getNode(node).getInsertEmissions();
+      strInserts = doubleListToStringList(doubleInserts, 5);
+      insertLine += addData(17, 9, strInserts);
+
+      file.append(insertLine + NEW_LINE);
+
+      String transitionLine = EMPTY;
+      List<String> strTransitions;
+      List<Double> doubleTransitions;
+      doubleTransitions = hmm.getNode(node).getStateTransitions();
+      strTransitions = doubleListToStringList(doubleTransitions, 5);
+      transitionLine += addData(17, 9, strTransitions);
+
+      file.append(transitionLine + NEW_LINE);
     }
+  }
+
+  void appendFileProperties(StringBuilder file)
+  {
+    String line;
+
+    file.append(fileHeader + NEW_LINE);
+    
+    line = String.format("%-5s %1s", "NAME", hmm.getName());
+    file.append((line + NEW_LINE));
 
-    return string;
+    if (hmm.getAccessionNumber() != null)
+    {
+    line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
+    file.append((line + NEW_LINE));
+    }
+
+    if (hmm.getDescription() != null)
+    {
+    line = String.format("%-5s %1s", "DESC", hmm.getDescription());
+    file.append((line + NEW_LINE));
+    }
+    line = String.format("%-5s %1s", "LENG", hmm.getLength());
+    file.append((line + NEW_LINE));
+
+    if (hmm.getMaxInstanceLength() != null)
+    {
+    line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
+    file.append((line + NEW_LINE));
+    }
+    line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
+    file.append((line + NEW_LINE));
+
+    line = String.format("%-5s %1s", "RF",
+            hmm.getFileProperties().get("RF"));
+    file.append((line + NEW_LINE));
+
+    line = String.format("%-5s %1s", "MM",
+            hmm.getFileProperties().get("MM"));
+    file.append((line + NEW_LINE));
+    
+    line = String.format("%-5s %1s", "CONS",
+            hmm.getFileProperties().get("CONS"));
+    file.append((line + NEW_LINE));
+
+    line = String.format("%-5s %1s", "CS",
+            hmm.getFileProperties().get("CS"));
+    file.append((line + NEW_LINE));
+
+    line = String.format("%-5s %1s", "MAP",
+            hmm.getFileProperties().get("MAP"));
+    file.append((line + NEW_LINE));
+
+    if (hmm.getDate() != null)
+    {
+    line = String.format("%-5s %1s", "DATE", hmm.getDate());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getNumberOfSequences() != null)
+    {
+    line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getEffectiveNumberOfSequences() != null)
+    {
+    line = String.format("%-5s %1s", "EFFN",
+            hmm.getEffectiveNumberOfSequences());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getCheckSum() != null)
+    {
+    line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getGatheringThreshold() != null)
+    {
+    line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
+    file.append((line + NEW_LINE));
+    }
+
+    if (hmm.getTrustedCutoff() != null)
+    {
+    line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getNoiseCutoff() != null)
+    {
+    line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getMSV() != null)
+    {
+      line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
+      file.append((line + NEW_LINE));
+
+      line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
+              hmm.getViterbi());
+      file.append((line + NEW_LINE));
+    
+      line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
+              hmm.getForward());
+      file.append((line + NEW_LINE));
+    }
+  }
+
+
+
+  public static char charValue(String string)
+  {
+    char character;
+    character = string.charAt(0);
+    return character;
   }
 }