transferred the storage position of the HMM to annotations
[jalview.git] / src / jalview / io / HMMFile.java
index 764db7f..6945ea6 100644 (file)
@@ -1,21 +1,19 @@
 package jalview.io;
 
-import jalview.datamodel.EValueStatistic;
+import jalview.datamodel.HMMNode;
 import jalview.datamodel.HiddenMarkovModel;
+import jalview.datamodel.SequenceI;
 
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.FileNotFoundException;
-import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Scanner;
 
+
 /**
  * reads in and writes out a HMMER standard file
  * 
@@ -23,32 +21,55 @@ import java.util.Scanner;
  * @author TZVanaalten
  *
  */
-public class HMMFile extends FileParse
+public class HMMFile extends AlignFile
+        implements AlignmentFileReaderI, AlignmentFileWriterI
 {
   // HMM to store file data
-  HiddenMarkovModel hmm = new HiddenMarkovModel();
+  private HiddenMarkovModel hmm = new HiddenMarkovModel();
+
 
-  // Source of file
-  String dataObject;
 
-  // number of symbols
-  int numberOfSymbols;
 
   // number of possible transitions
-  final int NUMBER_OF_TRANSITIONS = 7;
+  private final int NUMBER_OF_TRANSITIONS = 7;
+
+  private final String NEW_LINE = "\n";
+
 
   // file header
   String fileHeader;
 
-  /**
-   * Constructor which contains model to be filled or exported
-   * 
-   * @param dataSource
-   *          Filename, URL or Pasted String to read from
-   */
-  public HMMFile(String dataSource)
+  int numberOfSymbols;
+
+  private final String SPACE = " ";
+
+  private final String COMPO = "COMPO";
+
+  private final String EMPTY = "";
+
+  public HMMFile(FileParse source) throws IOException
+  {
+    super(false, source);
+  }
+
+  public HMMFile()
+  {
+
+  }
+
+  public HiddenMarkovModel getHMM()
+  {
+    return hmm;
+  }
+
+  public void setHMM(HiddenMarkovModel model)
   {
-    dataObject = dataSource;
+    this.hmm = model;
+  }
+
+  public String getName()
+  {
+    return hmm.getName();
   }
 
   /**
@@ -56,16 +77,15 @@ public class HMMFile extends FileParse
    * 
    * @throws IOException
    */
+  @Override
   public void parse() throws IOException
   {
-    File file = new File(dataObject);
-    FileReader fr = new FileReader(file);
-    BufferedReader br = new BufferedReader(fr);
-    parseFileProperties(br);
-    parseModel(br);
-
+    parseFileProperties(dataIn);
+    parseModel(dataIn);
   }
 
+
+
   /**
    * imports file properties from hmm file
    * 
@@ -73,7 +93,7 @@ public class HMMFile extends FileParse
    *          buffered reader used to read in file
    * @throws IOException
    */
-  public void parseFileProperties(BufferedReader input) throws IOException
+  void parseFileProperties(BufferedReader input) throws IOException
   {
     boolean readingFile = true;
     fileHeader = input.readLine();
@@ -88,30 +108,17 @@ public class HMMFile extends FileParse
                               // properties)
         {
           readingFile = false;
-          hmm.fillSymbols(line);
-          numberOfSymbols = hmm.getSymbols().size();
+          hmm.fillSymbols(parser);
+          numberOfSymbols = hmm.getNumberOfSymbols();
         }
-        else if ("STATS".equals(next)) // reads e-value stats into separate
-                                       // field
-                                     // on HMM object
+        else if ("STATS".equals(next))
         {
-          readStats(parser);
-        }
-        else if ("GA".equals(next) || "TC".equals(next)
-                || "NC".equals(next)) // reads
-                                                                            // pfam
-                                                                            // data
-                                                                            // into
-                                                                            // separate
-                                                                            // field
-                                                                            // on
-                                                                            // HMM
-                                                                            // object
-        {
-          Double[] data = new Double[2];
-          data[0] = parser.nextDouble();
-          data[1] = parser.nextDouble();
-          hmm.setPFAMData(next, data);
+          parser.next();
+          String key;
+          String value;
+          key = parser.next();
+          value = parser.next() + SPACE + SPACE + parser.next();
+          hmm.addFileProperty(key, value);
         }
         else
         {
@@ -119,9 +126,9 @@ public class HMMFile extends FileParse
           String value = parser.next();
           while (parser.hasNext())
           {
-            value = value + " " + parser.next();
+            value = value + SPACE + parser.next();
           }
-          hmm.put(key, value);
+          hmm.addFileProperty(key, value);
         }
         parser.close();
       }
@@ -135,96 +142,54 @@ public class HMMFile extends FileParse
   }
 
   /**
-   * creates a new EValueStatistic object to store stats
-   * 
-   * @param parser
-   *          Scanner which contains data for STATS line
-   * 
-   */
-  public void readStats(Scanner parser)
-  {
-    if (parser.hasNext())
-    {
-    String name;
-    double slope;
-    double location;
-    String configuration;
-
-    configuration = parser.next();
-    name = parser.next();
-    slope = parser.nextDouble();
-    location = parser.nextDouble();
-    hmm.addStatistic(name,
-            new EValueStatistic(configuration, slope, location));
-    }
-  }
-
-  /**
    * parses the model data from the hmm file
    * 
    * @param input
    *          buffered reader used to read file
    * @throws IOException
    */
-  public void parseModel(BufferedReader input) throws IOException
+  void parseModel(BufferedReader input) throws IOException
   {
-
-    String line = input.readLine();
-    Scanner scanner = new Scanner(line);
-    String next = scanner.next();
-    if ("COMPO".equals(next)) // checks to and stores COMPO data if present
+    for (int i = 0; i < hmm.getLength() + 1; i++)
     {
-      for (int i = 0; i < numberOfSymbols; i++)
-
+      hmm.getNodes().add(new HMMNode());
+      String next;
+      String line;
+      line = input.readLine();
+      Scanner matchReader = new Scanner(line);
+      next = matchReader.next();
+      if (next.equals(COMPO) || i > 0)
       {
-        hmm.getAverageMatchStateEmissionProbabilities()
-                .add(scanner.nextDouble());
+        // stores match emission line in list
+        List<Double> matches = new ArrayList<>();
+        matches = fillList(matchReader, numberOfSymbols);
+        hmm.getNodes().get(i).setMatchEmissions(matches);
+        if (i > 0)
+        {
+          parseAnnotations(matchReader, i);
+        }
       }
-    }
-    scanner.close();
-    parseBeginNodeData(input);
-    for (int i = 0; i < hmm.getLength(); i++)
-    {
-      Scanner matchReader = new Scanner(input.readLine());
-      matchReader.nextInt(); // skips number indicating position in HMM
-      hmm.getMatchEmissions()
-              .add(fillList(matchReader, numberOfSymbols));
-      parseAnnotations(matchReader, i);
       matchReader.close();
-      Scanner insertReader = new Scanner(input.readLine());
-      hmm.getInsertEmissions().add(fillList(insertReader, numberOfSymbols));
+      // stores insert emission line in list
+      line = input.readLine();
+      Scanner insertReader = new Scanner(line);
+      List<Double> inserts = new ArrayList<>();
+      inserts = fillList(insertReader, numberOfSymbols);
+      hmm.getNodes().get(i).setInsertEmissions(inserts);
       insertReader.close();
-      Scanner transitionReader = new Scanner(input.readLine());
-      hmm.getStateTransitions()
-              .add(fillList(transitionReader, NUMBER_OF_TRANSITIONS));
+
+      // stores state transition line in list
+      line = input.readLine();
+      Scanner transitionReader = new Scanner(line);
+      List<Double> transitions = new ArrayList<>();
+      transitions = fillList(transitionReader, NUMBER_OF_TRANSITIONS);
+      hmm.getNodes().get(i).setStateTransitions(transitions);
       transitionReader.close();
     }
 
   }
 
   /**
-   * parses the begin state transitions and insert 0 emissions
-   * 
-   * @param input
-   *          buffered reader used to read model
-   * @param currentline
-   *          string contain all data on current line of buffered reader
-   * @throws IOException
-   */
-
-  public void parseBeginNodeData(BufferedReader input)
-          throws IOException
-  {
-    Scanner scanner = new Scanner(input.readLine());
-    hmm.setInsertZeroEmissions(fillList(scanner, hmm.getSymbols().size()));
-    scanner.close();
-    Scanner scannerTransitions = new Scanner(input.readLine());
-    hmm.setBeginStateTransitions(
-            fillList(scannerTransitions, NUMBER_OF_TRANSITIONS));
-    scannerTransitions.close();
-  }
-
-  /**
    * parses annotations on match emission line
    * 
    * @param scanner
@@ -232,60 +197,39 @@ public class HMMFile extends FileParse
    * @param index
    *          index of node which is beign scanned
    */
-  public void parseAnnotations(Scanner scanner, int index)
+  void parseAnnotations(Scanner scanner, int index)
   {
-    if (hmm.getMapAnnotationFlag())
+    if (hmm.mapIsActive())
     {
-      hmm.getAlignmentColumnIndexes().add(scanner.nextInt());
+      int column;
+      column = scanner.nextInt();
+      hmm.getNodes().get(index).setAlignmentColumn(column);
+      hmm.getNodeLookup().put(column, index);
     }
     else
     {
       scanner.next();
     }
-    hmm.getAnnotations().add(new HashMap<String, Character>());
-    hmm.getAnnotations().get(index).put("CONS", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("RF", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("MM", scanner.next().charAt(0));
-    hmm.getAnnotations().get(index).put("CS", scanner.next().charAt(0));
-  }
-  /**
-   * 
-   * @param transition
-   *          type of transition occuring
-   * @return index value representing position along stateTransition array.
-   */
-  public Integer getTransitionType(String transition)
-  {
-    Integer index;
-    switch (transition)
-    {
-    case "mm":
-      index = 0;
-      break;
-    case "mi":
-      index = 1;
-      break;
-    case "md":
-      index = 2;
-      break;
-    case "im":
-      index = 3;
-      break;
-    case "ii":
-      index = 4;
-      break;
-    case "dm":
-      index = 5;
-      break;
-    case "dd":
-      index = 6;
-      break;
-    default:
-      index = null;
-    }
-    return index;
+
+    char consensusR;
+    consensusR = charValue(scanner.next());
+    hmm.getNodes().get(index).setConsensusResidue(consensusR);
+
+      char reference;
+      reference = charValue(scanner.next());
+      hmm.getNodes().get(index).setReferenceAnnotation(reference);
+
+
+      char value;
+      value = charValue(scanner.next());
+      hmm.getNodes().get(index).setMaskValue(value);
+
+    char consensusS;
+    consensusS = charValue(scanner.next());
+    hmm.getNodes().get(index).setConsensusStructure(consensusS);
   }
 
+
   /**
    * 
    * @param input
@@ -294,135 +238,340 @@ public class HMMFile extends FileParse
    *          number of elements in the list to be filled
    * @return filled list
    */
-  public static List<Double> fillList(Scanner input,
+  static List<Double> fillList(Scanner input,
           int numberOfElements)
   {
     List<Double> list = new ArrayList<>();
-    String next;
     for (int i = 0; i < numberOfElements; i++)
     {
-      next = input.next();
+
+      String next = input.next();
       if (next.contains("*")) // state transitions to or from delete states
                               // occasionally have values of -infinity. These
                               // values are represented by an * in the .hmm
                               // file, and by a null value in the
                               // HiddenMarkovModel class
       {
-        list.add(null);
+        list.add(Double.NEGATIVE_INFINITY);
       }
       else
       {
-        list.add(Double.valueOf(next));
+        double prob = Double.valueOf(next);
+        prob = Math.pow(Math.E, -prob);
+        list.add(prob);
       }
     }
     return list;
   }
 
+  
   /**
-   * writes a HiddenMarkovModel to a file. Needs mode work to make file more
-   * readable for humans (align columns)
+   * writes a HiddenMarkovModel to a file
    * 
    * @param exportLocation
    *          Filename, URL or Pasted String to write to
    * @throws FileNotFoundException
    * @throws UnsupportedEncodingException
-   */
-  public void exportFile(String exportLocation)
-          throws FileNotFoundException, UnsupportedEncodingException
+   *
+   **/
+  
+  public void exportFile(String exportLocation) throws IOException
+  {
+    StringBuilder file = new StringBuilder();
+    appendFileProperties(file);
+    appendModel(file);
+    file.append("//");
+
+    PrintWriter output = new PrintWriter(exportLocation);
+    output.append(file);
+    output.close();
+
+  }
+
+  String addData(int initialColumnSeparation,
+          int columnSeparation, List<String> data)
   {
-    PrintWriter writer = new PrintWriter(exportLocation, "UTF-8");
-    writer.println(fileHeader);
-    for (Map.Entry<String, String> entry : hmm.getFileProperties()
-            .entrySet())
+    String line = EMPTY;
+    int index = 0;
+    for (String value : data)
     {
-      writer.println(entry.getKey() + " " + entry.getValue());
+      if (index == 0)
+      {
+        line += String.format("%" + initialColumnSeparation + "s", value);
+      }
+      else
+      {
+        line += String.format("%" + columnSeparation + "s", value);
+      }
+      index++;
     }
-    writer.println(
-            "HMM" + " " + convertCharListToString(hmm.getSymbols()));
-    writer.println("m->m m->i m->d i->m i->i d->m d->d");
-    if (false == hmm.getAverageMatchStateEmissionProbabilities().isEmpty())
+    return line;
+  }
+
+  List<String> charListToStringList(List<Character> list)
+  {
+    List<String> strList = new ArrayList<>();
+    for (char value : list)
     {
-      writer.println("COMPO" + " " + convertDoubleListToString(
-              hmm.getAverageMatchStateEmissionProbabilities()));
+      String strValue = Character.toString(value);
+      strList.add(strValue);
     }
-    writer.println(convertDoubleListToString(hmm.getInsertZeroEmissions()));
-    writer.println(
-            convertDoubleListToString(hmm.getBeginStateTransitions()));
+    return strList;
+  }
 
-    for (Integer i = 0; i < hmm.getLength(); i++)
+  List<String> doubleListToStringList(List<Double> list,
+          int noOfDecimals)
+  {
+    List<String> strList = new ArrayList<>();
+    for (double value : list)
     {
-      String matchEmissionLine = i.toString() + " "; // adds node index
-      matchEmissionLine += convertDoubleListToString(
-              hmm.getMatchEmissions().get(i)); // adds match emissions
-      matchEmissionLine += " "
-              + hmm.getAlignmentColumnIndexes().get(i).toString(); // adds MAP
-                                                                   // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("CONS").toString(); // adds CONS
-                                                                    // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("RF").toString(); // adds RF
-                                                                  // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("MM").toString(); // adds MM
-                                                                  // annotation
-      matchEmissionLine += " "
-              + hmm.getAnnotations().get(i).get("CS").toString(); // adds CS
-                                                                  // annotation
-      writer.println(matchEmissionLine);
-
-      writer.println(
-              convertDoubleListToString(hmm.getInsertEmissions().get(i)));
-      writer.println(
-              convertDoubleListToString(hmm.getStateTransitions().get(i)));
-    }
-    writer.println("//");
+      String strValue;
+      if (value > 0)
+      {
+        strValue = String.format("%.5f", value);
+
+      }
+      else if (value == -0.00000d)
+      {
+        strValue = "0.00000";
+      }
+      else
+      {
+        strValue = "*";
+      }
 
-    writer.close();
+      strList.add(strValue);
+    }
+    return strList;
   }
 
-  /**
-   * converts an list of characters to a string with items separated by spaces
-   * 
-   * @param list
-   *          character list to be converted
-   * @return string value of char list
-   */
-  public String convertCharListToString(List<Character> list)
+  List<String> stringArrayToStringList(String[] array)
   {
-    String string = "";
-    for (Character item : list)
+    List<String> list = new ArrayList<>();
+    for (String value : array)
     {
-      string = string + item.toString() + " ";
+      list.add(value);
     }
 
-    return string;
+    return list;
   }
-  
-  /**
-   * converts an list of doubles to a string with items separated by spaces
-   * 
-   * @param list
-   *          double list to be converted
-   * @return string value of double list
-   */
-  public String convertDoubleListToString(List<Double> list)
+
+  void appendModel(StringBuilder file)
   {
-    String string = "";
-    for (Double item : list)
+    String symbolLine = "HMM";
+    List<Character> charSymbols = hmm.getSymbols();
+    List<String> strSymbols;
+    strSymbols = charListToStringList(charSymbols);
+    symbolLine += addData(11, 9, strSymbols);
+    file.append(symbolLine + NEW_LINE);
+
+    String transitionTypeLine = "";
+    List<String> transitionTypes;
+    transitionTypes = stringArrayToStringList(hmm.getTransitionTypes());
+    transitionTypeLine += addData(16, 9, transitionTypes);
+    file.append(transitionTypeLine + NEW_LINE);
+
+    int length = hmm.getLength();
+
+    for (int node = 0; node <= length; node++)
     {
-      if (item != null)
+      String matchLine;
+      if (node == 0)
       {
-        string = string + item.toString() + " ";
+        matchLine = String.format("%7s", "COMPO");
       }
       else
       {
-        string = string + "*" + " ";
+        matchLine = String.format("%7s", node);
       }
 
+      List<String> strMatches;
+      List<Double> doubleMatches;
+      doubleMatches = hmm.getNode(node).getMatchEmissions();
+      convertListToLogSpace(doubleMatches);
+      strMatches = doubleListToStringList(doubleMatches, 5);
+      matchLine += addData(10, 9, strMatches);
+
+
+      if (node != 0)
+      {
+        matchLine += SPACE + hmm.getNodeAlignmentColumn(node);
+        matchLine += SPACE + hmm.getConsensusResidue(node);
+        matchLine += SPACE + hmm.getReferenceAnnotation(node);
+        matchLine += SPACE + hmm.getMaskedValue(node);
+        matchLine += SPACE + hmm.getConsensusStructure(node);
+
+      }
+
+      file.append(matchLine + NEW_LINE);
+      
+      String insertLine = EMPTY;
+      List<String> strInserts;
+      List<Double> doubleInserts;
+      doubleInserts = hmm.getNode(node).getInsertEmissions();
+      convertListToLogSpace(doubleInserts);
+      strInserts = doubleListToStringList(doubleInserts, 5);
+      insertLine += addData(17, 9, strInserts);
+
+      file.append(insertLine + NEW_LINE);
+
+      String transitionLine = EMPTY;
+      List<String> strTransitions;
+      List<Double> doubleTransitions;
+      doubleTransitions = hmm.getNode(node).getStateTransitions();
+      convertListToLogSpace(doubleTransitions);
+      strTransitions = doubleListToStringList(doubleTransitions, 5);
+      transitionLine += addData(17, 9, strTransitions);
+
+      file.append(transitionLine + NEW_LINE);
     }
+  }
+
+  void appendFileProperties(StringBuilder file)
+  {
+    String line;
+
+    file.append(fileHeader + NEW_LINE);
+    
+    line = String.format("%-5s %1s", "NAME", hmm.getName());
+    file.append((line + NEW_LINE));
+
+    if (hmm.getAccessionNumber() != null)
+    {
+    line = String.format("%-5s %1s", "ACC", hmm.getAccessionNumber());
+    file.append((line + NEW_LINE));
+    }
+
+    if (hmm.getDescription() != null)
+    {
+    line = String.format("%-5s %1s", "DESC", hmm.getDescription());
+    file.append((line + NEW_LINE));
+    }
+    line = String.format("%-5s %1s", "LENG", hmm.getLength());
+    file.append((line + NEW_LINE));
+
+    if (hmm.getMaxInstanceLength() != null)
+    {
+    line = String.format("%-5s %1s", "MAXL", hmm.getMaxInstanceLength());
+    file.append((line + NEW_LINE));
+    }
+    line = String.format("%-5s %1s", "ALPH", hmm.getAlphabetType());
+    file.append((line + NEW_LINE));
+
+    boolean status;
+    String statusStr;
+
+    status = hmm.referenceAnnotationIsActive();
+    statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+    line = String.format("%-5s %1s", "RF",
+            statusStr);
+    file.append((line + NEW_LINE));
+
+    status = hmm.maskValueIsActive();
+    statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+    line = String.format("%-5s %1s", "MM",
+            statusStr);
+    file.append((line + NEW_LINE));
+    
+    status = hmm.consensusResidueIsActive();
+    statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+    line = String.format("%-5s %1s", "CONS",
+            statusStr);
+    file.append((line + NEW_LINE));
+
+    status = hmm.consensusStructureIsActive();
+    statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+    line = String.format("%-5s %1s", "CS",
+            statusStr);
+    file.append((line + NEW_LINE));
+
+    status = hmm.mapIsActive();
+    statusStr = HiddenMarkovModel.findStringFromBoolean(status);
+    line = String.format("%-5s %1s", "MAP",
+            statusStr);
+    file.append((line + NEW_LINE));
+
+
+    if (hmm.getDate() != null)
+    {
+    line = String.format("%-5s %1s", "DATE", hmm.getDate());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getNumberOfSequences() != null)
+    {
+    line = String.format("%-5s %1s", "NSEQ", hmm.getNumberOfSequences());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getEffectiveNumberOfSequences() != null)
+    {
+    line = String.format("%-5s %1s", "EFFN",
+            hmm.getEffectiveNumberOfSequences());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getCheckSum() != null)
+    {
+    line = String.format("%-5s %1s", "CKSUM", hmm.getCheckSum());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getGatheringThreshold() != null)
+    {
+    line = String.format("%-5s %1s", "GA", hmm.getGatheringThreshold());
+    file.append((line + NEW_LINE));
+    }
+
+    if (hmm.getTrustedCutoff() != null)
+    {
+    line = String.format("%-5s %1s", "TC", hmm.getTrustedCutoff());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getNoiseCutoff() != null)
+    {
+    line = String.format("%-5s %1s", "NC", hmm.getNoiseCutoff());
+    file.append((line + NEW_LINE));
+    }
+    if (hmm.getMSV() != null)
+    {
+      line = String.format("%-19s %18s", "STATS LOCAL MSV", hmm.getMSV());
+      file.append((line + NEW_LINE));
+
+      line = String.format("%-19s %18s", "STATS LOCAL VITERBI",
+              hmm.getViterbi());
+      file.append((line + NEW_LINE));
+    
+      line = String.format("%-19s %18s", "STATS LOCAL FORWARD",
+              hmm.getForward());
+      file.append((line + NEW_LINE));
+    }
+  }
+
+
+
+  char charValue(String string)
+  {
+    char character;
+    character = string.charAt(0);
+    return character;
+  }
+
+  @Override
+  public String print(SequenceI[] seqs, boolean jvsuffix)
+  {
+
+    return null;
+  }
+
+  void convertListToLogSpace(List<Double> list)
+  {
+
+    for (int i = 0; i < list.size(); i++)
+    {
+      double prob = list.get(i);
+      double logProb = -1 * Math.log(prob);
+
+      list.set(i, logProb);
+    }
+
 
-    return string;
   }
 }