X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2Fscoremodels%2FSecondaryStructureDistanceModel.java;h=3a719d8c5e6ca0ba952233fa68a58b18d0c65b86;hb=98a277d5e5bd7a034b2acbc4d28544210ada392e;hp=0aac7fac83a739ca4146d7f997c30b39bf1f1fd1;hpb=f1b00517fe5f888f3213d70270e27a83dffe1695;p=jalview.git diff --git a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java index 0aac7fa..3a719d8 100644 --- a/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SecondaryStructureDistanceModel.java @@ -32,7 +32,9 @@ import jalview.math.Matrix; import jalview.math.MatrixI; import jalview.util.SetUtils; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; /* This class contains methods to calculate distance score between @@ -42,9 +44,21 @@ import java.util.Set; public class SecondaryStructureDistanceModel extends DistanceScoreModel { private static final String NAME = "Secondary Structure Similarity"; + + private static final String SS_ANNOTATION_LABEL = "Secondary Structure"; + + private static final String SS_ANNOTATION_FROM_JPRED_LABEL = "jnetpred"; private String description; + //maximum distance score is defined as 2 as the possible number of unique ss is 2. + private static final int MAX_SCORE = 2; + + //minimum distance score is defined as 2 as the possible number of unique ss is 2. + private static final int MIN_SCORE = 0; + + private static final char COIL = 'C'; + FeatureRenderer fr; /** @@ -117,7 +131,8 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public MatrixI findDistances(AlignmentView seqData, SimilarityParamsI params) - { + { + SeqCigar[] seqs = seqData.getSequences(); int noseqs = seqs.length; //no of sequences int cpwidth = 0; // = seqData.getWidth(); @@ -125,6 +140,22 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel // need to get real position for view position int[] viscont = seqData.getVisibleContigs(); + Map> calcIdMapInAlignmentAnnotation = new HashMap>(); + + AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment().getAlignmentAnnotation(); + if(alignAnnotList.length > 0) { + + for (AlignmentAnnotation aa: alignAnnotList) { + if (SS_ANNOTATION_LABEL.equals(aa.label) || SS_ANNOTATION_FROM_JPRED_LABEL.equals(aa.label)) { + calcIdMapInAlignmentAnnotation.computeIfAbsent(aa.getCalcId(), k -> new HashSet<>()).add(aa.description); + } + + } + } + + + Set seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs, calcIdMapInAlignmentAnnotation); + /* * scan each column, compute and add to each distance[i, j] @@ -155,8 +186,27 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel { SeqCigar sc1 = seqs[i]; SeqCigar sc2 = seqs[j]; + + + //check if ss is defined + boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1); + boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2); + + // Set distance to 0 if both SS are not defined + if (undefinedSS1 && undefinedSS2) { + distances[i][j] += MIN_SCORE; + continue; + } + + // Set distance to maximum score if either one SS is not defined + else if(undefinedSS1 || undefinedSS2) { + distances[i][j] += MAX_SCORE; + continue; + } + + //check if the sequence contains gap in the current column boolean gap1 = !seqsWithoutGapAtCol.contains(sc1); - boolean gap2 = !seqsWithoutGapAtCol.contains(sc2); + boolean gap2 = !seqsWithoutGapAtCol.contains(sc2); //Variable to store secondary structure at the current column Set secondaryStructure1 = new HashSet(); @@ -164,20 +214,20 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel //secondary structure is fetched only if the current column is not //gap for the sequence - if(!gap1) { + if(!gap1 && !undefinedSS1) { secondaryStructure1.addAll( findSSAnnotationForGivenSeqAndCol(seqs[i], cpos)); } - if(!gap2) { + if(!gap2 && !undefinedSS2) { secondaryStructure2.addAll( findSSAnnotationForGivenSeqAndCol(seqs[j], cpos)); - } + } /* * gap-gap always scores zero - * residue-residue is always scored - * include gap-residue score if params say to do so + * ss-ss is always scored + * include gap-ss scores 1 if params say to do so */ if ((!gap1 && !gap2) || params.includeGaps()) { @@ -235,6 +285,47 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel } /** + * Builds and returns a set containing sequences (SeqCigar) which have + * no secondary structures defined + * + * @param seqs + * (0..) + * @return + */ + private static final String[] SS_ANNOTATION_LABELS = { + SS_ANNOTATION_LABEL, + SS_ANNOTATION_FROM_JPRED_LABEL + }; + + protected Set findSeqsWithUndefinedSS(SeqCigar[] seqs, Map> calcIdMapInAlignmentAnnotation) { + Set seqsWithUndefinedSS = new HashSet<>(); + for (SeqCigar seq : seqs) { + if (isSSUndefinedOrNotAdded(seq, calcIdMapInAlignmentAnnotation)) { + seqsWithUndefinedSS.add(seq); + } + } + return seqsWithUndefinedSS; + } + + private boolean isSSUndefinedOrNotAdded(SeqCigar seq, Map> calcIdMapInAlignmentAnnotation) { + for (String label : SS_ANNOTATION_LABELS) { + AlignmentAnnotation[] annotations = seq.getRefSeq().getAnnotation(label); + if (annotations != null) { + for (AlignmentAnnotation annotation : annotations) { + HashSet descriptionList = calcIdMapInAlignmentAnnotation.get(annotation.getCalcId()); + if (descriptionList.contains(annotation.description)) { + // Secondary structure annotation is present and added to the track, no need to add seq + return false; + } + } + } + } + // Either annotations are undefined or not added to the track + return true; + } + + + /** * Finds secondary structure annotation for a given sequence (SeqCigar) * and column position corresponding to the sequence. * @@ -248,27 +339,34 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel { Set secondaryStructure = new HashSet(); - char ss = '\0'; //default null character + char ss; //fetch the position in sequence for the column and finds the //corresponding secondary structure annotation + //TO DO - consider based on priority int seqPosition = seq.findPosition(columnPosition); - AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation("Secondary Structure"); + AlignmentAnnotation[] aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_LABEL); + + if(aa == null) { + aa = seq.getRefSeq().getAnnotation(SS_ANNOTATION_FROM_JPRED_LABEL); + } + if (aa != null) { if (aa[0].getAnnotationForPosition(seqPosition) != null) { Annotation a = aa[0].getAnnotationForPosition(seqPosition); ss = a.secondaryStructure; - if (ss == ' ') { - ss = 'C'; // In JalView, 'C' is represented as ' ' + + //There is no representation for coil and it can be either ' ' or null. + if (ss == ' ' || ss == '-') { + ss = COIL; } } else { - ss = 'C'; - } - if (ss != '\0') { // Check if ss is not the default null character - secondaryStructure.add(String.valueOf(ss)); + ss = COIL; } + secondaryStructure.add(String.valueOf(ss)); } + return secondaryStructure; } @@ -288,7 +386,7 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public boolean isDNA() { - return false; + return false; } @Override @@ -306,6 +404,6 @@ public class SecondaryStructureDistanceModel extends DistanceScoreModel @Override public String toString() { - return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column"; + return "Score between sequences based on hamming distance between binary vectors marking secondary structure displayed at each column"; } } \ No newline at end of file