/*
 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
 * Copyright (C) 2014 The Jalview Authors
 * 
 * This file is part of Jalview.
 * 
 * Jalview is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License 
 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 *  
 * Jalview is distributed in the hope that it will be useful, but 
 * WITHOUT ANY WARRANTY; without even the implied warranty 
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
 * PURPOSE.  See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
 * The Jalview Authors are detailed in the 'AUTHORS' file.
 */
package jalview.io;

import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

public class MegaFile extends AlignFile
{
  /*
   * Simple file format as at
   * http://www.hiv.lanl.gov/content/sequence/HelpDocs/SEQsamples.html
   * 
   * Fancy file format as at
   * http://primerdigital.com/fastpcr/images/Drosophila_Adh.txt
   */
  public enum FileFormat
  {
    SIMPLE, FANCY
  }

  private static final String HASHSIGN = "#"; // TODO: public constants file

  private static final String COLON = ":";

  private static final String BANG = "!";

  private static final String EQUALS = "=";

  private static final String MEGA_ID = HASHSIGN + "MEGA";

  public static final String PROP_TITLE = "TITLE";

  public static final String PROP_FORMAT = "Format";

  public static final String PROP_DESCRIPTION = "Description";

  public static final String PROP_GENE = "Gene";

  public static final String PROP_INTERLEAVED = "Interleaved";

  // initial size for sequence data buffer
  private static final int SEQBUFFERSIZE = 256;

  private static final String SPACE = " ";

  private static final int POSITIONS_PER_LINE = 50;

  // this can be True, False or null (meaning we don't know yet)
  private Boolean interleaved;

  // set once we have seen one block of interleaved data
  private boolean firstDataBlockRead = false;

  private FileFormat fileFormat;

  public MegaFile()
  {
  }

  public MegaFile(String inFile, String type) throws IOException
  {
    super(inFile, type);
  }

  public MegaFile(FileParse source) throws IOException
  {
    super(source);
  }

  /**
   * Parse the input stream.
   */
  @Override
  public void parse() throws IOException
  {
    /*
     * Read MEGA and Title/Format/Description/Gene headers if present. These are
     * saved as alignment properties. Returns the first sequence data line
     */
    String dataLine = parseHeaderLines();

    /*
     * If we didn't positively identify as 'fancy format', assume 'simple
     * format'
     */
    if (this.fileFormat == null)
    {
      setFileFormat(FileFormat.SIMPLE);
    }

    /*
     * Temporary store of {sequenceId, positionData} while parsing appending
     */
    Map<String, StringBuilder> seqData = new LinkedHashMap<String, StringBuilder>();

    /*
     * The id of the sequence being read (for non-interleaved)
     */
    String currentId = "";

    while (dataLine != null)
    {
      dataLine = dataLine.trim();
      if (dataLine.length() > 0)
      {
        currentId = parseDataLine(dataLine, seqData, currentId);
      }
      else if (!seqData.isEmpty())
      {
        /*
         * Blank line after processing some data...
         */
        this.firstDataBlockRead = true;
      }
      dataLine = nextLine();
    }

    setSequences(seqData);
  }

  /**
   * Convert the parsed sequence strings to objects and store them in the model.
   * 
   * @param seqData
   */
  protected void setSequences(Map<String, StringBuilder> seqData)
  {
    Set<Entry<String, StringBuilder>> datasets = seqData.entrySet();

    for (Entry<String, StringBuilder> dataset : datasets)
    {
      String sequenceId = dataset.getKey();
      StringBuilder characters = dataset.getValue();
      SequenceI s = new Sequence(sequenceId, new String(characters));
      this.seqs.addElement(s);
    }
  }

  /**
   * Process one line of sequence data. If it has no sequence identifier, append
   * to the current id's sequence. Else parse out the sequence id and append the
   * data (if any) to that id's sequence. Returns the sequence id (implicit or
   * explicit) for this line.
   * 
   * @param dataLine
   * @param seqData
   * @param currentid
   * @return
   * @throws IOException
   */
  protected String parseDataLine(String dataLine,
          Map<String, StringBuilder> seqData, String currentId)
          throws IOException
  {
    String seqId = getSequenceId(dataLine);
    if (seqId == null)
    {
      /*
       * Just character data
       */
      parseNoninterleavedDataLine(dataLine, seqData, currentId);
      return currentId;
    }
    else if ((HASHSIGN + seqId).trim().equals(dataLine.trim()))
    {
      /*
       * Sequence id only - header line for noninterleaved data
       */
      return seqId;
    }
    else
    {
      /*
       * Sequence id followed by data
       */
      parseInterleavedDataLine(dataLine, seqData, seqId);
      return seqId;
    }
  }

  /**
   * Add a line of sequence data to the buffer for the given sequence id. Start
   * a new one if we haven't seen it before.
   * 
   * @param dataLine
   * @param seqData
   * @param currentId
   * @throws IOException
   */
  protected void parseNoninterleavedDataLine(String dataLine,
          Map<String, StringBuilder> seqData, String currentId)
          throws IOException
  {
    if (currentId == null)
    {
      /*
       * Oops. Data but no sequence id context.
       */
      throw new IOException("No sequence id context at: " + dataLine);
    }

    assertInterleaved(false, dataLine);

    StringBuilder sb = getSequenceDataBuffer(seqData, currentId);

    /*
     * Add the current line of data to the sequence.
     */
    sb.append(dataLine);
  }

  /**
   * Get the sequence data for this sequence id, starting a new one if
   * necessary.
   * 
   * @param seqData
   * @param currentId
   * @return
   */
  protected StringBuilder getSequenceDataBuffer(
          Map<String, StringBuilder> seqData, String currentId)
  {
    StringBuilder sb = seqData.get(currentId);
    if (sb == null)
    {
      // first data met for this sequence id, start a new buffer
      sb = new StringBuilder(SEQBUFFERSIZE);
      seqData.put(currentId, sb);
    }
    return sb;
  }

  /**
   * Parse one line of interleaved data e.g.
   * 
   * <pre>
   * #TheSeqId CGATCGCATGCA
   * </pre>
   * 
   * @param dataLine
   * @param seqData
   * @param seqId
   * @throws IOException
   */
  protected void parseInterleavedDataLine(String dataLine,
          Map<String, StringBuilder> seqData, String seqId)
          throws IOException
  {
    /*
     * New sequence found in second or later data block - error.
     */
    if (this.firstDataBlockRead && !seqData.containsKey(seqId))
    {
      throw new IOException(
              "Parse error: misplaced new sequence starting at " + dataLine);
    }

    StringBuilder sb = getSequenceDataBuffer(seqData, seqId);
    String data = dataLine.substring(seqId.length() + 1).trim();

    /*
     * Do nothing if this line is _only_ a sequence id with no data following.
     * 
     * Remove any internal spaces (present in the 'fancy' file format)
     */
    if (data != null && data.length() > 0)
    {
      if (data.indexOf(SPACE) != -1)
      {
        data = data.replace(SPACE, "");
      }
      sb.append(data);
      assertInterleaved(true, dataLine);
    }
  }

  /**
   * If the line begins with (e.g.) "#abcde " then returns "abcde" as the
   * identifier. Else returns null.
   * 
   * @param dataLine
   * @return
   */
  public static String getSequenceId(String dataLine)
  {
    // TODO refactor to a StringUtils type class
    if (dataLine != null)
    {
      if (dataLine.startsWith(HASHSIGN))
      {
        int spacePos = dataLine.indexOf(" ");
        return (spacePos == -1 ? dataLine.substring(1) : dataLine
                .substring(1, spacePos));
      }
    }
    return null;
  }

  /**
   * Read the #MEGA and Title/Format/Description/Gene header lines (if present).
   * 
   * Save as annotation properties in case useful.
   * 
   * @return the next non-blank line following the header lines.
   * @throws IOException
   */
  protected String parseHeaderLines() throws IOException
  {
    String inputLine = null;
    while ((inputLine = nextLine()) != null)
    {
      inputLine = inputLine.trim();

      /*
       * skip blank lines
       */
      if (inputLine.length() == 0)
      {
        continue;
      }

      if (inputLine.startsWith(BANG))
      {
        setFileFormat(FileFormat.FANCY);
      }

      if (inputLine.startsWith(BANG + PROP_DESCRIPTION))
      {
        parseDescriptionLines();
      }

      else if (isPropertyLine(inputLine))
      {
        /*
         * If a property is matched, parse and save it.
         */
        String[] property_value = parsePropertyValue(inputLine);
        setAlignmentProperty(property_value[0], property_value[1]);
      }
      else if (!inputLine.toUpperCase().startsWith(MEGA_ID))
      {

        /*
         * Return the first 'data line' i.e. one that is not blank, #MEGA or
         * TITLE:
         */
        break;
      }
    }
    return inputLine;
  }

  /**
   * Read following lines until blank, appending each to the Description
   * property value.
   * 
   * Assumes the !Description line itself does not include description text.
   * 
   * Assumes the description is followed by a blank line (else we will consume
   * one too many).
   * 
   * @throws IOException
   */
  protected void parseDescriptionLines() throws IOException
  {
    StringBuilder desc = new StringBuilder(256);
    String line = null;
    while ((line = nextLine()) != null) {
      if ("".equals(line.trim()))
      {
        break;
      }
      desc.append(line).append(newline);
    }
    setAlignmentProperty(PROP_DESCRIPTION, desc.toString());
  }

  /**
   * Test whether the line holds an expected property declaration.
   * 
   * @param inputLine
   * @return
   */
  protected boolean isPropertyLine(String inputLine)
  {
    if (lineMatchesFlag(inputLine, PROP_TITLE, BANG, COLON)
            || lineMatchesFlag(inputLine, PROP_FORMAT, BANG, COLON)
            || lineMatchesFlag(inputLine, PROP_DESCRIPTION, BANG, COLON)
            || lineMatchesFlag(inputLine, PROP_GENE, BANG, COLON))
    {
      return true;
    }
    return false;
  }

  /**
   * Helper method that extract the name and value of a property, assuming the
   * first space or equals sign is the separator.
   * 
   * Thus "Description: Melanogaster" or "!Description=Melanogaster" both return
   * {"Description", "Melanogaster"}.
   * 
   * Returns an empty value string if no space or equals sign is present.
   * 
   * @param s
   * @return
   */
  public static String[] parsePropertyValue(String s)
  {
    // TODO refactor to a string utils helper class (or find equivalent)
    // TODO handle other cases e.g. "Description = Melanogaster"
    String propertyName = s;
    String value = "";

    int separatorPos = -1;

    if (s != null)
    {
      int spacePos = s.indexOf(SPACE);
      int eqPos = s.indexOf(EQUALS);
      if (spacePos == -1 && eqPos > -1)
      {
        separatorPos = eqPos;
      }
      else if (spacePos > -1 && eqPos == -1)
      {
        separatorPos = spacePos;
      }
      else if (spacePos > -1 && eqPos > -1)
      {
        separatorPos = Math.min(spacePos, eqPos);
      }
    }
    if (separatorPos > -1)
    {
      value = s.substring(separatorPos + 1);
      propertyName = s.substring(0, separatorPos);
    }

    /*
     * finally strip any leading / trailing chars from property name
     */
    if (propertyName.startsWith(BANG))
    {
      propertyName = propertyName.substring(1);
    }
    if (propertyName.endsWith(COLON))
    {
      propertyName = propertyName.substring(0, propertyName.length() - 1);
    }

    return new String[]
    { propertyName, value };
  }

  /**
   * Test whether a line starts with the specified flag field followed by a
   * space (or nothing).
   * 
   * Here we accept an optional prefix and suffix on the flag, and the check is
   * not case-sensitive. So these would match for "Title"
   * 
   * <pre>
   * Title Melanogaster
   * Title=Melanogaster
   * TITLE Melanogaster
   * TITLE=Melanogaster
   * !Title Melanogaster
   * !Title=Melanogaster
   * !TITLE Melanogaster
   * !TITLE=Melanogaster
   * Title: Melanogaster
   * Title:=Melanogaster
   * TITLE: Melanogaster
   * TITLE:=Melanogaster
   * !Title: Melanogaster
   * !Title:=Melanogaster
   * !TITLE: Melanogaster
   * !TITLE:=Melanogaster
   * Title
   * TITLE
   * !Title
   * !TITLE
   * </pre>
   * 
   * @param line
   * @param flag
   * @param prefix
   * @param suffix
   * @return
   */
  public static boolean lineMatchesFlag(String line, String flag, String prefix, String suffix)
  {
    // TODO refactor to a string utils helper class
    boolean result = false;
    if (line != null && flag != null) {
      String lineUpper = line.toUpperCase().trim();
      String flagUpper = flag.toUpperCase();
      
      // skip prefix character e.g. ! before attempting match
      if (lineUpper.startsWith(prefix)) {
        lineUpper = lineUpper.substring(1);
      }
      
      // test for flag + SPACE or flag + EQUALS, with or without suffix
      if (lineUpper.startsWith(flagUpper + SPACE)
              || lineUpper.startsWith(flagUpper + EQUALS)
              || lineUpper.startsWith(flagUpper + suffix + SPACE)
              || lineUpper.startsWith(flagUpper + suffix + EQUALS))
      {
        result = true;
      }
      else
      {
        // test for exact match i.e. flag only on this line
        if (lineUpper.equals(flagUpper)
                || lineUpper.startsWith(flagUpper + suffix))
        {
          result = true;
        }
      }
    }
    return result;
  }

  /**
   * Write out the alignment sequences in Mega format.
   */
  @Override
  public String print()
  {
    return print(getSeqsAsArray());
  }

  /**
   * Write out the alignment sequences in Mega format - interleaved unless
   * explicitly noninterleaved.
   */
  public String print(SequenceI[] s)
  {
    // TODO: is there a way to preserve the 'interleaved' property so it can
    // affect output?

    String result = null;
    if (this.fileFormat == FileFormat.FANCY)
    {
      result = printInterleavedCodons(s);
    }
    else if (this.interleaved != null && !this.interleaved)
    {
      result = printNonInterleaved(s);
    }
    else
    {
      result = printInterleaved(s);
    }
    return result;
  }

  /**
   * Print the sequences in interleaved format, each row 15 space-separated
   * triplets.
   * 
   * @param s
   * @return
   */
  protected String printInterleavedCodons(SequenceI[] s)
  {
    // TODO not coded yet - defaulting to the 'simple' format output
    return printInterleaved(s);
  }

  /**
   * Print to string in Interleaved format - blocks of next 50 characters of
   * each sequence in turn.
   * 
   * @param s
   */
  protected String printInterleaved(SequenceI[] s)
  {
    int maxIdLength = getMaxIdLength(s);
    int maxSequenceLength = getMaxSequenceLength(s);
    int numLines = maxSequenceLength / POSITIONS_PER_LINE + 3; // approx

    /*
     * Size a buffer to hold the whole output
     */
    StringBuilder sb = new StringBuilder(numLines
            * (maxIdLength + 2 + POSITIONS_PER_LINE));
    printHeaders(sb, FileFormat.SIMPLE);

    int numDataBlocks = (maxSequenceLength - 1) / POSITIONS_PER_LINE + 1;
    for (int i = 0; i < numDataBlocks; i++)
    {
      sb.append(newline);
      for (SequenceI seq : s)
      {

        String seqId = String.format("#%-" + maxIdLength + "s ",
                seq.getName());
        char[] subSequence = seq.getSequence(i * POSITIONS_PER_LINE,
                (i + 1) * POSITIONS_PER_LINE);
        sb.append(seqId);
        sb.append(subSequence);
        sb.append(newline);
      }
    }

    return new String(sb);
  }

  /**
   * Append the MEGA header and any other known properties
   * 
   * @param sb
   */
  private void printHeaders(StringBuilder sb, FileFormat format)
  {
    sb.append(MEGA_ID);
    sb.append(newline);
    /*
     * 
     */
    Set<Entry<Object, Object>> props = getAlignmentProperties();
    if (props != null)
    {
      for (Entry<Object, Object> prop : props)
      {
        Object key = prop.getKey();
        Object value = prop.getValue();
        if (key instanceof String && value instanceof String)
        {
          if (format == FileFormat.FANCY)
          {
            sb.append(BANG).append(key).append(SPACE).append(value);
          }
          else
          {
            sb.append(key).append(COLON).append(SPACE).append(value);
          }
          sb.append(newline);
        }
      }
    }
  }

  /**
   * Get the longest sequence id (to allow aligned printout).
   * 
   * @param s
   * @return
   */
  protected static int getMaxIdLength(SequenceI[] s)
  {
    // TODO pull up for reuse
    int maxLength = 0;
    for (SequenceI seq : s)
    {
      int len = seq.getName().length();
      if (len > maxLength)
      {
        maxLength = len;
      }
    }
    return maxLength;
  }

  /**
   * Get the longest sequence length
   * 
   * @param s
   * @return
   */
  protected static int getMaxSequenceLength(SequenceI[] s)
  {
    // TODO pull up for reuse
    int maxLength = 0;
    for (SequenceI seq : s)
    {
      int len = seq.getLength();
      if (len > maxLength)
      {
        maxLength = len;
      }
    }
    return maxLength;
  }

  /**
   * Print to string in noninterleaved format - all of each sequence in turn, in
   * blocks of 50 characters.
   * 
   * @param s
   * @return
   */
  protected String printNonInterleaved(SequenceI[] s)
  {
    int maxSequenceLength = getMaxSequenceLength(s);
    // approx
    int numLines = maxSequenceLength / POSITIONS_PER_LINE + 2 + s.length;

    /*
     * Roughly size a buffer to hold the whole output
     */
    StringBuilder sb = new StringBuilder(numLines * POSITIONS_PER_LINE);
    printHeaders(sb, FileFormat.SIMPLE);

    for (SequenceI seq : s)
    {
      sb.append(newline);
      sb.append(HASHSIGN + seq.getName()).append(newline);
      int startPos = 0;
      while (startPos <= seq.getLength())
      {
        char[] subSequence = seq.getSequence(startPos, startPos
                + POSITIONS_PER_LINE);
        sb.append(subSequence);
        sb.append(newline);
        startPos += POSITIONS_PER_LINE;
      }
    }

    return new String(sb);
  }

  /**
   * Flag this file as interleaved or not, based on data format. Throws an
   * exception if has previously been determined to be otherwise.
   * 
   * @param isIt
   * @param dataLine
   * @throws IOException
   */
  protected void assertInterleaved(boolean isIt, String dataLine)
          throws IOException
  {
    if (this.interleaved != null && isIt != this.interleaved.booleanValue())
    {
      throw new IOException(
              "Parse error: mix of interleaved and noninterleaved detected, at line: "
                      + dataLine);
    }
    this.interleaved = new Boolean(isIt);
  }

  public boolean isInterleaved()
  {
    return this.interleaved == null ? false : this.interleaved
            .booleanValue();
  }

  public FileFormat getFileFormat()
  {
    return this.fileFormat;
  }

  public void setFileFormat(FileFormat fileFormat)
  {
    this.fileFormat = fileFormat;
  }
}