import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
Map<String, Double> binned = new HashMap<>();
// location of the family file
- final static String FAMILIES = "C:/Users/TZVanaalten/Pfam-A.full";
+ final static String FAMILIES = "H:/Desktop/PFAM/Family/SeedFamilies.seed";
+
+ // location of the file containing the family-clan links
+ final static String FAMILIESTOCLAN = "H:/Desktop/PFAM/Family/Clanlinks.dat";
// location of the HMM file
final static String HMMS = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
// current directory
String currentFolder;
+ boolean keepRaw = false;
+
/**
* Sets the working directory.
*
}
/**
- * Moves a buffered reader to a specific location in the file, delimited by
- * '//'.
+ * Moves a buffered reader forward in the file by a certain amount of entries.
+ * Each entry in the file is delimited by '//'.
*
* @param index
* The index of the location in the file.
* @param br
* @throws IOException
*/
- public void moveToFile(int index, BufferedReader br) throws IOException
+ public void moveLocationBy(int index, BufferedReader br)
+ throws IOException
{
for (int i = 0; i < index; i++)
{
* The number of families to read before saving.
* @throws IOException
*/
- public void run(int increments) throws IOException
+ public void run(int increments, boolean keepRawData) throws IOException
{
-
+ keepRaw = keepRawData;
readPreviousData(currentFolder);
BufferedReader posReader = new BufferedReader(
new FileReader(currentFolder + "/CurrentPosition.txt"));
String line = posReader.readLine();
posReader.close();
- currentFilePosition = Integer.parseInt(line);
- BufferedReader inputSTO = new BufferedReader(
- new FileReader(FAMILIES));
- BufferedReader inputHMM = new BufferedReader(
- new FileReader(HMMS));
+ BufferedReader inputSTO = new BufferedReader(new FileReader(FAMILIES));
+ BufferedReader inputHMM = new BufferedReader(new FileReader(HMMS));
+
+ currentFilePosition = Integer.parseInt(line);
- moveToFile(currentFilePosition, inputHMM);
- moveToFile(currentFilePosition, inputSTO);
+ moveLocationBy(currentFilePosition, inputHMM);
+ moveLocationBy(currentFilePosition, inputSTO);
int filesRead = 0;
+ int i = 0;
while (filesRead < increments)
{
+
FileParse parserSTO = new FileParse(inputSTO, "",
DataSourceType.FILE);
readStockholm(parserSTO);
DataSourceType.FILE);
readHMM(parserHMM);
- if (hmm.getAlphabetType().equals("amino"))
- {
int count = countValidResidues();
processData(count);
filesRead++;
- }
+
currentFilePosition++;
+ System.out.println(i);
+ i++;
}
PrintWriter p = new PrintWriter(
- new File(currentFolder + "/CurrentPosition"));
+ new File(currentFolder + "/CurrentPosition.txt"));
p.print(currentFilePosition);
p.close();
exportData(currentFolder);
public void readPreviousData(String source) throws IOException
{
readBinned(source);
- readRaw(source);
+ if (keepRaw)
+ {
+ readRaw(source);
+ }
}
/**
BufferedReader input = new BufferedReader(
new FileReader(source + BINNED));
String line = input.readLine();
+ binned = new HashMap<>();
while (!("".equals(line) || line == null))
{
- binned = new HashMap<>();
Scanner scanner = new Scanner(line);
scanner.useDelimiter(",");
binned.put(scanner.next(), scanner.nextDouble());
*/
public void processData(int count)
{
+ int rawPos = 0;
+ if (keepRaw)
+ {
+ raw.add(new ArrayList<Double>());
+ rawPos = raw.size() - 1;
+ }
- raw.add(new ArrayList<Double>());
- int rawPos = raw.size() - 1;
for (int width = 0; width < sequences.size(); width++)
{
for (int length = 1; length < hmm.getLength(); length++)
char symbol;
int alignPos;
alignPos = hmm.getNodeAlignmentColumn(length);
-
+
symbol = sequences.get(width).getCharAt(alignPos);
if (ResidueProperties.aminoBackgroundFrequencies
.containsKey(symbol))
{
-
Double prob;
Float bfreq;
Double llr;
prob = hmm.getMatchEmissionProbability(alignPos, symbol);
bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
llr = Math.log(prob / bfreq);
- raw.get(rawPos).add(llr);
+ if (keepRaw)
+ {
+ raw.get(rawPos).add(llr);
+ }
+
String output;
output = String.format("%.1f", llr);
if ("-0.0".equals(output))
public void readStockholm(FileParse source) throws IOException
{
StockholmFile file = new StockholmFile(source);
- file.parse();
sequences = file.getSeqs();
}
writerBin.println(entry.getKey() + "," + entry.getValue());
}
writerBin.close();
+ if (keepRaw)
+ {
PrintWriter writerRaw = new PrintWriter(new File(location + RAW));
-
+
StringBuilder identifier = new StringBuilder();
-
+
for (int i = 1; i < raw.size() + 1; i++)
{
identifier.append("Fam " + i + ",");
}
-
+
writerRaw.println(identifier);
-
+
boolean rowIsEmpty = false;
int row = 0;
while (!rowIsEmpty)
}
writerRaw.close();
+ }
+
}
/**
{
BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
- moveToFile(index, br);
+ moveLocationBy(index, br);
String line = br.readLine();
{
BufferedReader br = new BufferedReader(new FileReader(HMMS));
- moveToFile(index, br);
+ moveLocationBy(index, br);
String line = br.readLine();
}
/**
- * Prints the specified family to a .sto file in the current directory.
+ * Prints the specified family to a .sto file.
*
* @param index
* @throws IOException
*/
- public void printFamToFile(int index) throws IOException
+ public void exportFam(int index, String location) throws IOException
{
- String name;
-
- BufferedReader nameFinder = new BufferedReader(
- new FileReader(FAMILIES));
-
- moveToFile(index, nameFinder);
-
- nameFinder.readLine();
-
- Scanner scanner = new Scanner(nameFinder.readLine());
- scanner.next();
- scanner.next();
- name = scanner.next();
- scanner.close();
-
BufferedReader br = new BufferedReader(new FileReader(FAMILIES));
- moveToFile(index, br);
+ moveLocationBy(index, br);
String line = br.readLine();
PrintWriter writer = new PrintWriter(
- currentFolder + "/" + name + ".sto");
+ new FileOutputStream(new File(location), true));
while (!"//".equals(line))
{
writer.println(line);
}
- /**
- * Prints the specified family to a .hmm file in the current directory.
- *
- * @param index
- * @throws IOException
- */
- public void printHMMToFile(int index) throws IOException
+ public void exportFile(BufferedReader br, String location)
+ throws IOException
{
+ String line = br.readLine();
+ PrintWriter writer = new PrintWriter(
+ new FileOutputStream(new File(location), true));
+ while (!"//".equals(line))
+ {
+ writer.println(line);
+ line = br.readLine();
+ }
+ writer.println(line);
+ writer.close();
+
+
+ }
+ public String getHMMName(int index) throws IOException
+ {
String name;
BufferedReader nameFinder = new BufferedReader(new FileReader(HMMS));
- moveToFile(index, nameFinder);
+ moveLocationBy(index, nameFinder);
+
+ nameFinder.readLine();
+
+ Scanner scanner = new Scanner(nameFinder.readLine());
+ name = scanner.next();
+ name = scanner.next();
+ scanner.close();
+ return name;
+ }
+
+ public String getFamilyName(int index) throws IOException
+ {
+ String name;
+
+ BufferedReader nameFinder = new BufferedReader(
+ new FileReader(FAMILIES));
+
+ moveLocationBy(index, nameFinder);
nameFinder.readLine();
Scanner scanner = new Scanner(nameFinder.readLine());
name = scanner.next();
name = scanner.next();
+ name = scanner.next();
scanner.close();
+ return name;
+ }
+
+ /**
+ * Prints the specified family to a .hmm file in the current directory.
+ *
+ * @param index
+ * @throws IOException
+ */
+ public void exportHMM(int index, String location) throws IOException
+ {
+
BufferedReader br = new BufferedReader(new FileReader(HMMS));
- moveToFile(index, br);
+ moveLocationBy(index, br);
String line = br.readLine();
PrintWriter writer = new PrintWriter(
- currentFolder + "/" + name + ".hmm");
+ new FileOutputStream(new File(location), true));
while (!"//".equals(line))
{
writer.println(line);
raw.close();
}
-}
+ public void sortIntoClans(String directory) throws IOException
+ {
+ BufferedReader clanFinder = new BufferedReader(new FileReader(FAMILIESTOCLAN));
+ BufferedReader familyReader = new BufferedReader(
+ new FileReader(FAMILIES));
+ BufferedReader hmmReader = new BufferedReader(new FileReader(HMMS));
+ HashMap<String, Integer> clanIndexes = new HashMap<>();
+ int filePos = 0;
+ int clanCount = 0;
+ String line;
+ line = clanFinder.readLine();
+
+ while (!"".equals(line) && !" ".equals(line) && line != null)
+ {
+ String clanName;
+ boolean inClan = false;
+ while (!(line.indexOf("//") > -1))
+ {
+
+ if (line.indexOf("#=GF CL") > -1)
+ {
+ inClan = true;
+ Scanner scanner = new Scanner(line);
+ scanner.next();
+ scanner.next();
+ clanName = scanner.next();
+ scanner.close();
+
+ if (!clanIndexes.containsKey(clanName))
+ {
+ clanIndexes.put(clanName, clanCount);
+ clanCount++;
+ }
+
+ Integer clanI = clanIndexes.get(clanName);
+ String clanPath = directory + "/Clan" + clanI.toString();
+ File clanFolder = new File(clanPath);
+ String famPath = clanPath + "/Families.sto";
+ String hmmPath = clanPath + "/HMMs.hmm";
+ if (!clanFolder.exists())
+ {
+ clanFolder.mkdir();
+ }
+ exportFile(familyReader, famPath);
+ exportFile(hmmReader, hmmPath);
+
+ }
+ line = clanFinder.readLine();
+ }
+ if (!inClan)
+ {
+ moveLocationBy(1, familyReader);
+ moveLocationBy(1, hmmReader);
+ }
+ filePos++;
+ System.out.println(filePos + " files read.");
+ line = clanFinder.readLine();
+
+ }
+ clanFinder.close();
+
+ }
+
+ }
+
+
// prints family to console. Syntax is printFam <index>
if (command.indexOf("printFam") > -1)
{
- inputScanner.next();
- int index = inputScanner.nextInt();
- analyser.printFam(index);
- continue;
+ try
+ {
+ inputScanner.next();
+ int index = inputScanner.nextInt();
+ analyser.printFam(index);
+ continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
+
}
// prints HMM to console. Syntax is printHMM <index>
if (command.indexOf("printHMM") > -1)
{
-
+ try
+ {
inputScanner.next();
int index = inputScanner.nextInt();
analyser.printHMM(index);
continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
}
// prints family to file in current folder. Syntax is exportFam <index>.
if (command.indexOf("exportFam") > -1)
{
-
+ try
+ {
inputScanner.next();
int index = inputScanner.nextInt();
- String location = inputScanner.next();
- analyser.printFamToFile(index);
+ String location = inputScanner.next();
+ analyser.exportFam(index, location);
continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
}
// prints HMM to file in current folder. Syntax is exportHMM <index>.
if (command.indexOf("exportHMM") > -1)
{
-
+ try
+ {
inputScanner.next();
int index = inputScanner.nextInt();
- String location = inputScanner.next();
- analyser.printHMMToFile(index);
+ String location = inputScanner.next();
+ analyser.exportHMM(index, location);
continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
}
// Processes data. Syntax is run <number of loops> <increments>. The
// number loops specifies the number of increments the program will run.
// increment is the number of families read per 'save'.
if (command.indexOf("run") > -1)
{
+ try
+ {
+
inputScanner.next();
int loops = inputScanner.nextInt();
int increments = inputScanner.nextInt();
+ boolean keepRaw = inputScanner.nextBoolean();
for (int i = 0; i < loops; i++)
{
- analyser.run(increments);
+ analyser.run(increments, keepRaw);
+ System.out.println("Saved");
}
+ System.out.println("Task completed");
continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
}
// terminates program. Syntax is terminate.
if (command.indexOf("terminate") > -1)
// changes current directory. Syntax is cd <directory>
if (command.indexOf("cd") > -1)
{
+ try
+ {
inputScanner.next();
analyser.setFolder(inputScanner.next());
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
+ }
+
+ if (command.indexOf("getFamName") > -1)
+ {
+ try
+ {
+ inputScanner.next();
+ System.out.println(analyser.getFamilyName(inputScanner.nextInt()));
+ inputScanner.close();
+ continue;
+ } catch (Exception e)
+ {
+ System.out.println("Command failed");
+ }
+ }
+ if (command.indexOf("sortIntoClans") > -1)
+ {
+ inputScanner.next();
+ analyser.sortIntoClans(inputScanner.next());
+ continue;
+
}
- inputScanner.close();
- continue;
}