package jalview.util;
+import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.HiddenMarkovModel;
import jalview.datamodel.SequenceI;
import jalview.io.DataSourceType;
*/
public class HMMProbabilityDistributionAnalyser
{
+ AlignmentAnnotation reference = null;
Vector<SequenceI> sequences;
Map<String, Double> binned = new HashMap<>();
// location of the family file
- String families = "H:/Desktop/PFAM/Family/SeedFamilies.seed";
+ String families = "/media/sf_Shared_Folder/PFAM/Family/SeedFamilies.seed";
// location of the file containing the family-clan links
- final static String FAMILIESTOCLAN = "H:/Desktop/PFAM/Family/Clanlinks.dat";
+ final static String FAMILIESTOCLAN = "/media/sf_Shared_Folder/PFAM/Family/Clanlinks.dat";
// location of the HMM file
- String hmms = "H:/Desktop/PFAM/HMMs/Pfam-A.hmm";
+ String hmms = "/media/sf_Shared_Folder/PFAM/HMMs/Pfam-A.hmm";
// suffix for raw file
final static String RAW = "/Raw.csv";
while (!"//".equals(line))
{
line = br.readLine();
+
}
}
/**
* Analyses a specified number of families and then saves the data. Before
* analysing the data, the previous saved data will be imported and after
- * analysing this, the data is exported back into the file.
+ * analysing this, the data is exported back into the file. The file must be
+ * in flat file format.
*
* @param increments
* The number of families to read before saving.
/**
* Analyses all families and then saves the data. Before analysing the data,
* the previous saved data will be imported and after analysing this, the data
- * is exported back into the file.
+ * is exported back into the file. The file must be in flat file format.
*
* @param increments
* The number of families to read before saving.
* @throws IOException
*/
- public void runToEnd(boolean keepRawData) throws IOException
+ public void runToEnd(boolean keepRawData, boolean forClans)
+ throws IOException
{
keepRaw = keepRawData;
BufferedReader inputSTO = null;
BufferedReader inputHMM = null;
int size = 0;
+ int files = 1;
try
{
- readPreviousData(currentFolder);
- BufferedReader posReader = new BufferedReader(
- new FileReader(currentFolder + "/CurrentPosition.txt"));
-
- String line = posReader.readLine();
- posReader.close();
- currentFilePosition = Integer.parseInt(line);
- readPreviousData(currentFolder);
-
- inputSTO = new BufferedReader(new FileReader(families));
- inputHMM = new BufferedReader(new FileReader(hmms));
- } catch (Exception e)
+ if (forClans)
{
- System.out.println("No or incomplete previous data found");
+ files = 603;
}
+ int filesRead = 0;
+ for (int clan = 0; clan < files; clan++)
+ {
+ System.out.println(clan);
+ String clanPath = "";
+ int numberOfFamilies = 0;
+ if (forClans)
+ {
+ clanPath = currentFolder + "/Clan" + clan;
+ if (!new File(clanPath).exists())
+ {
+ continue;
+ }
+ BufferedReader famCountReader = new BufferedReader(
+ new FileReader(clanPath + "/NumberOfFamilies.txt"));
+ numberOfFamilies = Integer.parseInt(famCountReader.readLine());
+ }
+ else
+ {
+ numberOfFamilies = 1;
+ }
+
+ for (int fam = 0; fam < numberOfFamilies; fam++)
+ {
+ if (forClans)
+ {
+ families = clanPath + "/Families/Fam" + fam + ".sto";
+ hmms = clanPath + "/HMMs/HMM" + fam + ".hmm";
+ }
-
+ inputSTO = new BufferedReader(new FileReader(families));
+ inputHMM = new BufferedReader(new FileReader(hmms));
- moveLocationBy(currentFilePosition, inputHMM);
- moveLocationBy(currentFilePosition, inputSTO);
- int filesRead = 0;
- int i = 0;
- boolean endReached = atEnd(inputSTO);
- while (!endReached)
- {
- readStockholm(inputSTO);
- readHMM(inputHMM);
+ int i = 0;
+ boolean endReached = atEnd(inputSTO);
+ while (!endReached)
+ {
+ readStockholm(inputSTO);
+ readHMM(inputHMM);
int count = countValidResidues();
processData(count);
filesRead++;
-
- currentFilePosition++;
- System.out.println(i);
- i++;
+ System.out.println(filesRead);
endReached = atEnd(inputSTO);
}
-
-
- PrintWriter p = new PrintWriter(
- new File(currentFolder + "/CurrentPosition.txt"));
- p.print(currentFilePosition);
- p.close();
- exportData(currentFolder);
- raw.clear();
- binned.clear();
-
+ }
+ }
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ } finally
+ {
+ exportData(currentFolder);
+ raw.clear();
+ binned.clear();
+ }
}
/**
raw.add(new ArrayList<Double>());
rawPos = raw.size() - 1;
}
-
+ Double total = 0d;
for (int width = 0; width < sequences.size(); width++)
{
for (int length = 1; length < hmm.getLength() + 1; length++)
Double llr;
prob = hmm.getMatchEmissionProbability(alignPos, symbol);
bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol);
+ if (prob == 0 || bfreq == 0)
+ {
+ System.out.println("error");
+ }
llr = Math.log(prob / bfreq);
if (keepRaw)
{
String output;
output = String.format("%.1f", llr);
+ total += Double.parseDouble(output);
if ("-0.0".equals(output))
{
output = "0.0";
}
}
}
+ System.out.println(total / count);
}
{
FileParse parserSTO = new FileParse(inputSTO, "", DataSourceType.FILE);
StockholmFile file = new StockholmFile(parserSTO);
+ Vector<AlignmentAnnotation> annots = file.getAnnotations();
+
+ for (AlignmentAnnotation annot : annots)
+ {
+ if (annot.label.contains("Reference"))
+ {
+ reference = annot;
+ }
+ }
sequences = file.getSeqs();
}
{
FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE);
HMMFile file = new HMMFile(parserHMM);
- file.parse();
hmm = file.getHMM();
+
}
/**
}
/**
- * Prints the specified family to a .hmm file in the current directory.
+ * Prints the specified family to a .hmm file.
*
* @param index
* @throws IOException
BufferedReader familyReader = new BufferedReader(
new FileReader(families));
BufferedReader hmmReader = new BufferedReader(new FileReader(hmms));
+ int families = 0;
+ // moveLocationBy(7000, familyReader);
+ // moveLocationBy(7000, clanFinder);
+ // moveLocationBy(7000, hmmReader);
HashMap<String, Integer> clanIndexes = new HashMap<>();
ArrayList<Integer> familyCounts = new ArrayList<>();
int filePos = 0;
if (line.indexOf("#=GF CL") > -1)
{
+ families++;
+ System.out.println(families);
inClan = true;
Scanner scanner = new Scanner(line);
scanner.next();
moveLocationBy(1, hmmReader);
}
filePos++;
- System.out.println(filePos + " files read.");
+ // System.out.println(filePos + " files read.");
line = clanFinder.readLine();
}
int alignmentsExported = 0;
for (int clan = 0; clan < 604; clan++)
{
+ System.out.println(clan);
int famCount = 0;
String clanPath = clansLocation + "/Clan" + clan;
int numberOfFamilies;
String line = br.readLine();
numberOfFamilies = Integer.parseInt(line);
br.close();
- String commandExportLocation = exportLocation + "/Clan" + clan;
+ if (numberOfFamilies == 1)
+ {
+ continue;
+ }
+ final String commandExportLocation = exportLocation + "/Clan" + clan;
createFolders(commandExportLocation);
for (int family = 0; family < numberOfFamilies; family++)
{
indexes.add(i);
}
}
+
int hmmIndex = getRandom(indexes);
String famPath = clanPath + "/Families/Fam" + family + ".sto";
String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm";
- String command = "H:/Documents/hmmalign -o " + commandExportLocation
- + "/Fam" + family + ".sto ";
+ String command = "/media/sf_Shared_Folder/hmmer/binaries/hmmalign --mapali "
+ + clanPath + "/Families/Fam" + hmmIndex + ".sto"
+ + " --trim ";
command += hmmPath + " ";
command += famPath;
-
+ final int familyIndex = family;
final Process p = Runtime.getRuntime().exec(command);
new Thread(new Runnable()
try
{
+ PrintWriter writer = new PrintWriter(commandExportLocation
+ + "/Families/Fam" + familyIndex + ".sto");
+ String lastLine = "";
+ boolean dataFound = false;
while ((line = input.readLine()) != null)
{
- System.out.println(line);
+ if (line.contains("#=GR") && !dataFound)
+ {
+ writer.println(lastLine);
+ dataFound = true;
+ }
+ if (line.contains("#") || dataFound || " ".equals(line)
+ || "".equals(line) || "//".equals(line))
+ {
+ writer.println(line);
+ }
+ lastLine = line;
}
+ writer.close();
} catch (IOException e)
{
e.printStackTrace();
p.waitFor();
- exportHMM(hmmIndex,
- commandExportLocation + "/HMMs/HMM" + family + ".hmm");
+ BufferedReader hmmExporter = new BufferedReader(
+ new FileReader(hmmPath));
+
+ exportFile(hmmExporter,
+ commandExportLocation + "/HMMs/HMM" + family + ".hmm",
+ false);
alignmentsExported++;
- System.out.println(alignmentsExported + " alignments exported");
}
PrintWriter writer = new PrintWriter(
public int getRandom(ArrayList<Integer> list)
{
+ if (!(list.size() > 0))
+ {
+ System.out.println("Error - size = " + list.size());
+ }
int index = generator.nextInt(list.size());
int value = list.get(index);
list.remove(index);