/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.util.Comparison;
import jalview.util.Format;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
/**
* DOCUMENT ME!
*
* @author $author$
* @version $Revision$
*/
public class MSFfile extends AlignFile
{
/**
* Creates a new MSFfile object.
*/
public MSFfile()
{
}
/**
* Creates a new MSFfile object.
*
* @param inFile
* DOCUMENT ME!
* @param type
* DOCUMENT ME!
*
* @throws IOException
* DOCUMENT ME!
*/
public MSFfile(String inFile, DataSourceType type) throws IOException
{
super(inFile, type);
}
public MSFfile(FileParse source) throws IOException
{
super(source);
}
/**
* Read and parse MSF sequence data
*/
@Override
public void parse() throws IOException
{
boolean seqFlag = false;
List headers = new ArrayList();
Hashtable seqhash = new Hashtable();
try
{
String line;
while ((line = nextLine()) != null)
{
StringTokenizer str = new StringTokenizer(line);
String key = null;
while (str.hasMoreTokens())
{
String inStr = str.nextToken();
// If line has header information add to the headers vector
if (inStr.indexOf("Name:") != -1)
{
key = str.nextToken();
headers.add(key);
}
// if line has // set SeqFlag so we know sequences are coming
if (inStr.indexOf("//") != -1)
{
seqFlag = true;
}
// Process lines as sequence lines if seqFlag is set
if ((inStr.indexOf("//") == -1) && seqFlag)
{
// sequence id is the first field
key = inStr;
StringBuilder tempseq;
// Get sequence from hash if it exists
if (seqhash.containsKey(key))
{
tempseq = seqhash.get(key);
}
else
{
tempseq = new StringBuilder(64);
seqhash.put(key, tempseq);
}
// loop through the rest of the words
while (str.hasMoreTokens())
{
// append the word to the sequence
String sequenceBlock = str.nextToken();
tempseq.append(sequenceBlock);
}
}
}
}
} catch (IOException e)
{
System.err.println("Exception parsing MSFFile " + e);
e.printStackTrace();
}
this.noSeqs = headers.size();
// Add sequences to the hash
for (int i = 0; i < headers.size(); i++)
{
if (seqhash.get(headers.get(i)) != null)
{
String head = headers.get(i);
String seq = seqhash.get(head).toString();
if (maxLength < head.length())
{
maxLength = head.length();
}
/*
* replace ~ (leading/trailing positions) with the gap character;
* use '.' as this is the internal gap character required by MSF
*/
seq = seq.replace('~', '.');
Sequence newSeq = parseId(head);
newSeq.setSequence(seq);
seqs.addElement(newSeq);
}
else
{
System.err.println("MSFFile Parser: Can't find sequence for "
+ headers.get(i));
}
}
}
/**
* DOCUMENT ME!
*
* @param seq
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
public int checkSum(String seq)
{
int check = 0;
String sequence = seq.toUpperCase(Locale.ROOT);
for (int i = 0; i < sequence.length(); i++)
{
try
{
int value = sequence.charAt(i);
if (value != -1)
{
check += (i % 57 + 1) * value;
}
} catch (Exception e)
{
System.err.println("Exception during MSF Checksum calculation");
e.printStackTrace();
}
}
return check % 10000;
}
/**
* DOCUMENT ME!
*
* @param s
* DOCUMENT ME!
* @param is_NA
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
@Override
public String print(SequenceI[] sqs, boolean jvSuffix)
{
boolean is_NA = Comparison.isNucleotide(sqs);
SequenceI[] s = new SequenceI[sqs.length];
StringBuilder out = new StringBuilder(256);
out.append("!!").append(is_NA ? "NA" : "AA")
.append("_MULTIPLE_ALIGNMENT 1.0");
// TODO: JBPNote : Jalview doesn't remember NA or AA yet.
out.append(newline);
out.append(newline);
int max = 0;
int maxid = 0;
int i = 0;
while ((i < sqs.length) && (sqs[i] != null))
{
/*
* modify to MSF format: uses '.' for internal gaps,
* and '~' for leading or trailing gaps
*/
String seqString = sqs[i].getSequenceAsString().replace('-', '.');
StringBuilder sb = new StringBuilder(seqString);
for (int ii = 0; ii < sb.length(); ii++)
{
if (sb.charAt(ii) == '.')
{
sb.setCharAt(ii, '~');
}
else
{
break;
}
}
for (int ii = sb.length() - 1; ii > 0; ii--)
{
if (sb.charAt(ii) == '.')
{
sb.setCharAt(ii, '~');
}
else
{
break;
}
}
s[i] = new Sequence(sqs[i].getName(), sb.toString(),
sqs[i].getStart(), sqs[i].getEnd());
if (sb.length() > max)
{
max = sb.length();
}
i++;
}
Format maxLenpad = new Format(
"%" + (new String("" + max)).length() + "d");
Format maxChkpad = new Format(
"%" + (new String("1" + max)).length() + "d");
i = 0;
int bigChecksum = 0;
int[] checksums = new int[s.length];
while (i < s.length)
{
checksums[i] = checkSum(s[i].getSequenceAsString());
bigChecksum += checksums[i];
i++;
}
long maxNB = 0;
out.append(" MSF: " + s[0].getLength() + " Type: "
+ (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000)
+ " ..");
out.append(newline);
out.append(newline);
out.append(newline);
String[] nameBlock = new String[s.length];
String[] idBlock = new String[s.length];
i = 0;
while ((i < s.length) && (s[i] != null))
{
nameBlock[i] = new String(" Name: " + printId(s[i], jvSuffix) + " ");
idBlock[i] = new String("Len: " + maxLenpad.form(s[i].getLength())
+ " Check: " + maxChkpad.form(checksums[i])
+ " Weight: 1.00" + newline);
if (s[i].getName().length() > maxid)
{
maxid = s[i].getName().length();
}
if (nameBlock[i].length() > maxNB)
{
maxNB = nameBlock[i].length();
}
i++;
}
if (maxid < 10)
{
maxid = 10;
}
if (maxNB < 15)
{
maxNB = 15;
}
Format nbFormat = new Format("%-" + maxNB + "s");
for (i = 0; (i < s.length) && (s[i] != null); i++)
{
out.append(nbFormat.form(nameBlock[i]) + idBlock[i]);
}
maxid++;
out.append(newline);
out.append(newline);
out.append("//");
out.append(newline);
out.append(newline);
int len = 50;
int nochunks = (max / len) + (max % len > 0 ? 1 : 0);
for (i = 0; i < nochunks; i++)
{
int j = 0;
while ((j < s.length) && (s[j] != null))
{
String name = printId(s[j], jvSuffix);
out.append(new Format("%-" + maxid + "s").form(name + " "));
for (int k = 0; k < 5; k++)
{
int start = (i * 50) + (k * 10);
int end = start + 10;
int length = s[j].getLength();
if ((end < length)
&& (start < length))
{
out.append(s[j].getSequence(start, end));
if (k < 4)
{
out.append(" ");
}
else
{
out.append(newline);
}
}
else
{
if (start < length)
{
out.append(s[j].getSequenceAsString().substring(start));
out.append(newline);
}
else
{
if (k == 0)
{
out.append(newline);
}
}
}
}
j++;
}
out.append(newline);
}
return out.toString();
}
}