From: Jim Procter Date: Thu, 23 May 2024 17:14:50 +0000 (+0100) Subject: JAL-4422 convert from String to char[] before computing PID for two strings derived... X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=f59e4dd5ddf41fd74bd463799dcae7d7ff2b2d52;p=jalview.git JAL-4422 convert from String to char[] before computing PID for two strings derived from sequences and report progress on stdout for every 10th of the similarity matrix --- diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 22cffb1..77b3c74 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -24,6 +24,7 @@ import jalview.analysis.scoremodels.PIDModel; import jalview.analysis.scoremodels.ScoreMatrix; import jalview.analysis.scoremodels.ScoreModels; import jalview.analysis.scoremodels.SimilarityParams; +import jalview.bin.Console; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Mapping; @@ -1261,18 +1262,23 @@ public class AlignSeq redundancy[i] = 0f; lngth[i] = -1; } - - // long start = System.currentTimeMillis(); + + long start_time = System.currentTimeMillis(); SimilarityParams pidParams = new SimilarityParams(true, true, true, true); float pid; - String seqi, seqj; + char[] seqi, seqj; + long pc=0; for (int i = 0; i < height; i++) { - for (int j = 0; j < i; j++) { + if (j*j > pc) + { + pc += height*height/10; + Console.outPrintln(""+(System.currentTimeMillis()-start_time)/1000f+"On the "+j+"'th sequence."); + } if (i == j) { continue; @@ -1280,33 +1286,39 @@ public class AlignSeq if (omitHidden == null) { - seqi = originalSequences[i].getSequenceAsString(start, end); - seqj = originalSequences[j].getSequenceAsString(start, end); + seqi = originalSequences[i].getSequence(start, end); + seqj = originalSequences[j].getSequence(start, end); + if (lngth[i]==-1) { + lngth[i] = 1-originalSequences[i].findPosition(start)+originalSequences[i].findPosition(end); + } + if (lngth[j]==-1) { + lngth[j] = 1-originalSequences[j].findPosition(start)+originalSequences[j].findPosition(end); + } } else { - seqi = omitHidden[i]; - seqj = omitHidden[j]; - } - if (lngth[i] == -1) - { - String ug = AlignSeq.extractGaps(Comparison.GapChars, seqi); - lngth[i] = ug.length(); - if (ungapped) + seqi = omitHidden[i].toCharArray(); + seqj = omitHidden[j].toCharArray(); + if (lngth[i] == -1) { - seqi = ug; + String ug = AlignSeq.extractGaps(Comparison.GapChars, omitHidden[i]); + lngth[i] = ug.length(); + if (ungapped) + { + seqi = ug.toCharArray(); + } } - } - if (lngth[j] == -1) - { - String ug = AlignSeq.extractGaps(Comparison.GapChars, seqj); - lngth[j] = ug.length(); - if (ungapped) + if (lngth[j] == -1) { - seqj = ug; + String ug = AlignSeq.extractGaps(Comparison.GapChars, omitHidden[j]); + lngth[j] = ug.length(); + if (ungapped) + { + seqj = ug.toCharArray(); + } } } - pid = (float) PIDModel.computePID(seqi, seqj, pidParams); + pid = (float) PIDModel.computePIDChar(seqi, seqj, pidParams); // use real sequence length rather than string length if (lngth[j] < lngth[i]) diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java index ddfe5e4..e8b9b0b 100644 --- a/src/jalview/analysis/scoremodels/PIDModel.java +++ b/src/jalview/analysis/scoremodels/PIDModel.java @@ -180,8 +180,22 @@ public class PIDModel extends SimilarityScoreModel public static double computePID(String seq1, String seq2, SimilarityParamsI options) { - int len1 = seq1.length(); - int len2 = seq2.length(); + return computePIDChar(seq1.toCharArray(),seq2.toCharArray(),options); + } + /** + * Computes a percentage identity for two sequences, using the algorithm + * choices specified by the options parameter + * + * @param seq1 + * @param seq2 + * @param options + * @return + */ + public static double computePIDChar(char[] seq1, char[] seq2, + final SimilarityParamsI options) + { + int len1 = seq1.length; + int len2 = seq2.length; int width = Math.max(len1, len2); int total = 0; int divideBy = 0; @@ -208,10 +222,8 @@ public class PIDModel extends SimilarityScoreModel } continue; } - char c1 = seq1.charAt(i); - char c2 = seq2.charAt(i); - boolean gap1 = Comparison.isGap(c1); - boolean gap2 = Comparison.isGap(c2); + boolean gap1 = Comparison.isGap(seq1[i]); + boolean gap2 = Comparison.isGap(seq2[i]); if (gap1 && gap2) { @@ -247,7 +259,7 @@ public class PIDModel extends SimilarityScoreModel /* * remaining case is gap-residue */ - if (toUpper(c1) == toUpper(c2)) + if (toUpper(seq1[i]) == toUpper(seq2[i])) { total++; }