X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FHMMProbabilityDistributionAnalyser.java;h=66ae5526e7c5cce7a2e49eefc6a8e00872f77f2b;hb=456e5c765ac1e85336fd9f9b1a35453069bb2298;hp=7d6687817a008aba47aa9f5ed3c2520e9c5e0de8;hpb=539028fc857235cff10084f0c8e18be4294a2f85;p=jalview.git diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java index 7d66878..66ae552 100644 --- a/src/jalview/util/HMMProbabilityDistributionAnalyser.java +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -48,13 +48,13 @@ public class HMMProbabilityDistributionAnalyser Map binned = new HashMap<>(); // location of the family file - String families = "H:/Desktop//PFAM/Family/SeedFamilies.seed"; + String families = "/media/sf_Shared_Folder/PFAM/Family/SeedFamilies.seed"; // location of the file containing the family-clan links - final static String FAMILIESTOCLAN = "H:/Desktop//PFAM/Family/Clanlinks.dat"; + final static String FAMILIESTOCLAN = "/media/sf_Shared_Folder/PFAM/Family/Clanlinks.dat"; // location of the HMM file - String hmms = "H:/Desktop//PFAM/HMMs/Pfam-A.hmm"; + String hmms = "/media/sf_Shared_Folder/PFAM/HMMs/Pfam-A.hmm"; // suffix for raw file final static String RAW = "/Raw.csv"; @@ -185,7 +185,8 @@ public class HMMProbabilityDistributionAnalyser * The number of families to read before saving. * @throws IOException */ - public void runToEnd(boolean keepRawData, boolean forClans) + public void runToEnd(int minCount, int maxCount, boolean keepRawData, + boolean forClans) throws IOException { keepRaw = keepRawData; @@ -193,18 +194,25 @@ public class HMMProbabilityDistributionAnalyser BufferedReader inputHMM = null; int size = 0; int files = 1; + try + { if (forClans) { - files = 604; + files = 603; } int filesRead = 0; for (int clan = 0; clan < files; clan++) { + System.out.println(clan); String clanPath = ""; int numberOfFamilies = 0; if (forClans) { clanPath = currentFolder + "/Clan" + clan; + if (!new File(clanPath).exists()) + { + continue; + } BufferedReader famCountReader = new BufferedReader( new FileReader(clanPath + "/NumberOfFamilies.txt")); numberOfFamilies = Integer.parseInt(famCountReader.readLine()); @@ -234,16 +242,25 @@ public class HMMProbabilityDistributionAnalyser readHMM(inputHMM); int count = countValidResidues(); - processData(count); + if (count >= minCount && count < maxCount) + { + processData(count); + } filesRead++; System.out.println(filesRead); endReached = atEnd(inputSTO); } } } - exportData(currentFolder); - raw.clear(); - binned.clear(); + } catch (Exception e) + { + e.printStackTrace(); + } finally + { + exportData(currentFolder); + raw.clear(); + binned.clear(); + } } /** @@ -356,10 +373,10 @@ public class HMMProbabilityDistributionAnalyser { char symbol; int alignPos; - alignPos = hmm.getNodeAlignmentColumn(length); + alignPos = hmm.getNodeMapPosition(length); symbol = sequences.get(width).getCharAt(alignPos); - if (ResidueProperties.aminoBackgroundFrequencies + if (ResidueProperties.backgroundFrequencies.get("amino") .containsKey(symbol)) { count++; @@ -383,24 +400,29 @@ public class HMMProbabilityDistributionAnalyser raw.add(new ArrayList()); rawPos = raw.size() - 1; } - + Double total = 0d; for (int width = 0; width < sequences.size(); width++) { for (int length = 1; length < hmm.getLength() + 1; length++) { char symbol; int alignPos; - alignPos = hmm.getNodeAlignmentColumn(length); + alignPos = hmm.getNodeMapPosition(length); symbol = sequences.get(width).getCharAt(alignPos); - if (ResidueProperties.aminoBackgroundFrequencies + if (ResidueProperties.backgroundFrequencies.get("amino") .containsKey(symbol)) { Double prob; Float bfreq; Double llr; prob = hmm.getMatchEmissionProbability(alignPos, symbol); - bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); + bfreq = ResidueProperties.backgroundFrequencies.get("amino") + .get(symbol); + if (prob == 0 || bfreq == 0) + { + System.out.println("error"); + } llr = Math.log(prob / bfreq); if (keepRaw) { @@ -409,6 +431,7 @@ public class HMMProbabilityDistributionAnalyser String output; output = String.format("%.1f", llr); + total += Double.parseDouble(output); if ("-0.0".equals(output)) { output = "0.0"; @@ -427,6 +450,7 @@ public class HMMProbabilityDistributionAnalyser } } } + System.out.println(total / count); } @@ -462,13 +486,8 @@ public class HMMProbabilityDistributionAnalyser { FileParse parserHMM = new FileParse(inputHMM, "", DataSourceType.FILE); HMMFile file = new HMMFile(parserHMM); - file.parse(); hmm = file.getHMM(); - if (reference != null) - { - hmm.mapToReferenceAnnotation(reference); - } } @@ -709,6 +728,7 @@ public class HMMProbabilityDistributionAnalyser BufferedReader familyReader = new BufferedReader( new FileReader(families)); BufferedReader hmmReader = new BufferedReader(new FileReader(hmms)); + int families = 0; // moveLocationBy(7000, familyReader); // moveLocationBy(7000, clanFinder); // moveLocationBy(7000, hmmReader); @@ -721,10 +741,6 @@ public class HMMProbabilityDistributionAnalyser while (!"".equals(line) && !" ".equals(line) && line != null) { - if (line.contains("HATP") || line.contains("CL0025")) - { - System.out.println(filePos); - } String clanName; boolean inClan = false; while (!(line.indexOf("//") > -1)) @@ -732,6 +748,8 @@ public class HMMProbabilityDistributionAnalyser if (line.indexOf("#=GF CL") > -1) { + families++; + System.out.println(families); inClan = true; Scanner scanner = new Scanner(line); scanner.next(); @@ -816,7 +834,7 @@ public class HMMProbabilityDistributionAnalyser int alignmentsExported = 0; for (int clan = 0; clan < 604; clan++) { - + System.out.println(clan); int famCount = 0; String clanPath = clansLocation + "/Clan" + clan; int numberOfFamilies; @@ -846,7 +864,7 @@ public class HMMProbabilityDistributionAnalyser int hmmIndex = getRandom(indexes); String famPath = clanPath + "/Families/Fam" + family + ".sto"; String hmmPath = clanPath + "/HMMs/HMM" + hmmIndex + ".hmm"; - String command = "H:/Desktop//hmmer/binaries/hmmalign --mapali " + String command = "/media/sf_Shared_Folder/hmmer/binaries/hmmalign --mapali " + clanPath + "/Families/Fam" + hmmIndex + ".sto" + " --trim "; command += hmmPath + " "; @@ -902,8 +920,6 @@ public class HMMProbabilityDistributionAnalyser alignmentsExported++; - System.out.println(alignmentsExported + " alignments exported"); - System.out.println("At clan " + clan); } PrintWriter writer = new PrintWriter(