/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis.scoremodels; import jalview.api.analysis.ScoreModelI; import jalview.api.analysis.ViewBasedAnalysisI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SeqCigar; import jalview.datamodel.SequenceFeature; import jalview.util.SetUtils; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI { jalview.api.FeatureRenderer fr; @Override public boolean configureFromAlignmentView( jalview.api.AlignmentViewPanel view) { fr = view.cloneFeatureRenderer(); return true; } /** * Calculates a distance measure [i][j] between each pair of sequences as the * average number of features they have but do not share. That is, find the * features each sequence pair has at each column, ignore feature types they * have in common, and count the rest. The totals are normalised by the number * of columns processed. */ @Override public float[][] findDistances(AlignmentView seqData) { List dft = fr.getDisplayedFeatureTypes(); SeqCigar[] seqs = seqData.getSequences(); int noseqs = seqs.length; int cpwidth = 0;// = seqData.getWidth(); float[][] distance = new float[noseqs][noseqs]; if (dft.isEmpty()) { return distance; } // need to get real position for view position int[] viscont = seqData.getVisibleContigs(); /* * scan each column, compute and add to each distance[i, j] * the number of feature types that seqi and seqj do not share */ for (int vc = 0; vc < viscont.length; vc += 2) { for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++) { cpwidth++; /* * first pass: record features types in column for each sequence */ Map> sfap = findFeatureTypesAtColumn( seqs, cpos); /* * count feature types on either i'th or j'th sequence but not both * and add this 'distance' measure to the total for [i, j] for j > i */ for (int i = 0; i < (noseqs - 1); i++) { for (int j = i + 1; j < noseqs; j++) { int seqDistance = SetUtils.countDisjunction(sfap.get(seqs[i]), sfap.get(seqs[j])); distance[i][j] += seqDistance; } } } } /* * normalise the distance scores (summed over columns) by the * number of visible columns used in the calculation */ for (int i = 0; i < noseqs; i++) { for (int j = i + 1; j < noseqs; j++) { distance[i][j] /= cpwidth; distance[j][i] = distance[i][j]; } } return distance; } /** * Builds and returns a list (one per SeqCigar) of visible feature types at * the given column position * * @param seqs * @param columnPosition * @return */ protected Map> findFeatureTypesAtColumn( SeqCigar[] seqs, int columnPosition) { Map> sfap = new HashMap>(); for (SeqCigar seq : seqs) { Set types = new HashSet(); int spos = seq.findPosition(columnPosition); if (spos != -1) { List sfs = fr.findFeaturesAtRes(seq.getRefSeq(), spos); for (SequenceFeature sf : sfs) { types.add(sf.getType()); } } sfap.put(seq, types); } return sfap; } @Override public String getName() { return "Sequence Feature Similarity"; } @Override public boolean isDNA() { return true; } @Override public boolean isProtein() { return true; } @Override public String toString() { return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column"; } }