*/
package jalview.analysis;
+import jalview.analysis.scoremodels.PIDModel;
import jalview.analysis.scoremodels.ScoreMatrix;
import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Mapping;
import java.awt.Color;
import java.awt.Graphics;
+import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
*/
public class AlignSeq
{
+ private static final int MAX_NAME_LENGTH = 30;
+
+ private static final int GAP_OPEN_COST = 120;
+
+ private static final int GAP_EXTEND_COST = 20;
+
+ private static final int GAP_INDEX = -1;
+
public static final String PEP = "pep";
public static final String DNA = "dna";
float[][] F;
- int[][] traceback;
+ int[][] traceback; // todo is this actually used?
int[] seq1;
/** DOCUMENT ME!! */
public int seq2start;
- /** DOCUMENT ME!! */
public int seq2end;
int count;
- /** DOCUMENT ME!! */
public float maxscore;
- float pid;
-
int prev = 0;
- int gapOpen = 120;
-
- int gapExtend = 20;
-
- float[][] lookup;
-
- int gapIndex = 23;
-
StringBuffer output = new StringBuffer();
String type; // AlignSeq.PEP or AlignSeq.DNA
- private ScoreMatrix scoreModel;
+ private ScoreMatrix scoreMatrix;
/**
* Creates a new AlignSeq object.
*
- * @param s1 first sequence for alignment
- * @param s2 second sequence for alignment
- * @param type molecule type, either AlignSeq.PEP or AlignSeq.DNA
+ * @param s1
+ * first sequence for alignment
+ * @param s2
+ * second sequence for alignment
+ * @param type
+ * molecule type, either AlignSeq.PEP or AlignSeq.DNA
*/
public AlignSeq(SequenceI s1, SequenceI s2, String type)
{
SequenceI alSeq1 = new Sequence(s1.getName(), getAStr1());
alSeq1.setStart(s1.getStart() + getSeq1Start() - 1);
alSeq1.setEnd(s1.getStart() + getSeq1End() - 1);
- alSeq1.setDatasetSequence(s1.getDatasetSequence() == null ? s1 : s1
- .getDatasetSequence());
+ alSeq1.setDatasetSequence(
+ s1.getDatasetSequence() == null ? s1 : s1.getDatasetSequence());
return alSeq1;
}
SequenceI alSeq2 = new Sequence(s2.getName(), getAStr2());
alSeq2.setStart(s2.getStart() + getSeq2Start() - 1);
alSeq2.setEnd(s2.getStart() + getSeq2End() - 1);
- alSeq2.setDatasetSequence(s2.getDatasetSequence() == null ? s2 : s2
- .getDatasetSequence());
+ alSeq2.setDatasetSequence(
+ s2.getDatasetSequence() == null ? s2 : s2.getDatasetSequence());
return alSeq2;
}
if (s1str.length() == 0 || s2str.length() == 0)
{
- output.append("ALL GAPS: "
- + (s1str.length() == 0 ? s1.getName() : " ")
- + (s2str.length() == 0 ? s2.getName() : ""));
+ output.append(
+ "ALL GAPS: " + (s1str.length() == 0 ? s1.getName() : " ")
+ + (s2str.length() == 0 ? s2.getName() : ""));
return;
}
- seq1 = new int[s1str.length()];
-
- seq2 = new int[s2str.length()];
-
score = new float[s1str.length()][s2str.length()];
E = new float[s1str.length()][s2str.length()];
if (!PEP.equals(moleculeType) && !DNA.equals(moleculeType))
{
output.append("Wrong type = dna or pep only");
- throw new Error(MessageManager.formatMessage(
- "error.unknown_type_dna_or_pep",
- new String[] { moleculeType }));
+ throw new Error(MessageManager
+ .formatMessage("error.unknown_type_dna_or_pep", new String[]
+ { moleculeType }));
}
type = moleculeType;
- scoreModel = ScoreModels.getInstance().getDefaultModel(
- PEP.equals(type));
- lookup = scoreModel.getMatrix();
- gapIndex = scoreModel.getMatrixIndex(' ');
+ scoreMatrix = ScoreModels.getInstance()
+ .getDefaultModel(PEP.equals(type));
}
/**
}
}
- // System.out.println(maxi + " " + maxj + " " + score[maxi][maxj]);
int i = maxi;
int j = maxj;
int trace;
- maxscore = score[i][j] / 10;
+ maxscore = score[i][j] / 10f;
seq1end = maxi + 1;
seq2end = maxj + 1;
else if (trace == 1)
{
j--;
- aseq1[count] = gapIndex;
+ aseq1[count] = GAP_INDEX;
sb1.replace(sb1.length() - 1, sb1.length(), "-");
}
else if (trace == -1)
{
i--;
- aseq2[count] = gapIndex;
+ aseq2[count] = GAP_INDEX;
sb2.replace(sb2.length() - 1, sb2.length(), "-");
}
seq1start = i + 1;
seq2start = j + 1;
- if (aseq1[count] != gapIndex)
+ if (aseq1[count] != GAP_INDEX)
{
aseq1[count] = seq1[i];
sb1.append(s1str.charAt(i));
}
- if (aseq2[count] != gapIndex)
+ if (aseq2[count] != GAP_INDEX)
{
aseq2[count] = seq2[j];
sb2.append(s2str.charAt(j));
/**
* DOCUMENT ME!
*/
- public void printAlignment(java.io.PrintStream os)
+ public void printAlignment(PrintStream os)
{
// TODO: Use original sequence characters rather than re-translated
// characters in output
// Find the biggest id length for formatting purposes
- String s1id = s1.getName(), s2id = s2.getName();
- int maxid = s1.getName().length();
- if (s2.getName().length() > maxid)
- {
- maxid = s2.getName().length();
- }
- if (maxid > 30)
+ String s1id = getAlignedSeq1().getDisplayId(true);
+ String s2id = getAlignedSeq2().getDisplayId(true);
+ int nameLength = Math.max(s1id.length(), s2id.length());
+ if (nameLength > MAX_NAME_LENGTH)
{
- maxid = 30;
+ int truncateBy = nameLength - MAX_NAME_LENGTH;
+ nameLength = MAX_NAME_LENGTH;
// JAL-527 - truncate the sequence ids
- if (s1.getName().length() > maxid)
+ if (s1id.length() > nameLength)
{
- s1id = s1.getName().substring(0, 30);
+ int slashPos = s1id.lastIndexOf('/');
+ s1id = s1id.substring(0, slashPos - truncateBy)
+ + s1id.substring(slashPos);
}
- if (s2.getName().length() > maxid)
+ if (s2id.length() > nameLength)
{
- s2id = s2.getName().substring(0, 30);
+ int slashPos = s2id.lastIndexOf('/');
+ s2id = s2id.substring(0, slashPos - truncateBy)
+ + s2id.substring(slashPos);
}
}
- int len = 72 - maxid - 1;
+ int len = 72 - nameLength - 1;
int nochunks = ((aseq1.length - count) / len)
+ ((aseq1.length - count) % len > 0 ? 1 : 0);
- pid = 0;
+ float pid = 0f;
output.append("Score = ").append(score[maxi][maxj]).append(NEWLINE);
output.append("Length of alignment = ")
.append(String.valueOf(aseq1.length - count)).append(NEWLINE);
output.append("Sequence ");
- output.append(new Format("%" + maxid + "s").form(s1.getName()));
- output.append(" : ").append(String.valueOf(s1.getStart()))
- .append(" - ").append(String.valueOf(s1.getEnd()));
+ Format nameFormat = new Format("%" + nameLength + "s");
+ output.append(nameFormat.form(s1id));
output.append(" (Sequence length = ")
.append(String.valueOf(s1str.length())).append(")")
.append(NEWLINE);
output.append("Sequence ");
- output.append(new Format("%" + maxid + "s").form(s2.getName()));
- output.append(" : ").append(String.valueOf(s2.getStart()))
- .append(" - ").append(String.valueOf(s2.getEnd()));
+ output.append(nameFormat.form(s2id));
output.append(" (Sequence length = ")
.append(String.valueOf(s2str.length())).append(")")
.append(NEWLINE).append(NEWLINE);
for (int j = 0; j < nochunks; j++)
{
// Print the first aligned sequence
- output.append(new Format("%" + (maxid) + "s").form(s1id)).append(" ");
+ output.append(nameFormat.form(s1id)).append(" ");
for (int i = 0; i < len; i++)
{
}
output.append(NEWLINE);
- output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
+ output.append(nameFormat.form(" ")).append(" ");
/*
* Print out the match symbols:
pid++;
output.append("|");
}
- else if (type.equals("pep"))
+ else if (PEP.equals(type))
{
if (pam250.getPairwiseScore(c1, c2) > 0)
{
// Now print the second aligned sequence
output = output.append(NEWLINE);
- output = output.append(new Format("%" + (maxid) + "s").form(s2id))
- .append(" ");
+ output = output.append(nameFormat.form(s2id)).append(" ");
for (int i = 0; i < len; i++)
{
}
pid = pid / (aseq1.length - count) * 100;
- output = output.append(new Format("Percentage ID = %2.2f\n").form(pid));
+ output.append(new Format("Percentage ID = %3.2f\n").form(pid));
+ output.append(NEWLINE);
try
{
os.print(output.toString());
public int findTrace(int i, int j)
{
int t = 0;
- float max = score[i - 1][j - 1] + (lookup[seq1[i]][seq2[j]] * 10);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(j));
+ float max = score[i - 1][j - 1] + (pairwiseScore * 10);
if (F[i][j] > max)
{
int m = seq2.length;
// top left hand element
- score[0][0] = lookup[seq1[0]][seq2[0]] * 10;
- E[0][0] = -gapExtend;
+ score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0),
+ s2str.charAt(0)) * 10;
+ E[0][0] = -GAP_EXTEND_COST;
F[0][0] = 0;
// Calculate the top row first
for (int j = 1; j < m; j++)
{
// What should these values be? 0 maybe
- E[0][j] = max(score[0][j - 1] - gapOpen, E[0][j - 1] - gapExtend);
- F[0][j] = -gapExtend;
+ E[0][j] = max(score[0][j - 1] - GAP_OPEN_COST, E[0][j - 1] - GAP_EXTEND_COST);
+ F[0][j] = -GAP_EXTEND_COST;
- score[0][j] = max(lookup[seq1[0]][seq2[j]] * 10, -gapOpen, -gapExtend);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0),
+ s2str.charAt(j));
+ score[0][j] = max(pairwiseScore * 10, -GAP_OPEN_COST, -GAP_EXTEND_COST);
traceback[0][j] = 1;
}
// Now do the left hand column
for (int i = 1; i < n; i++)
{
- E[i][0] = -gapOpen;
- F[i][0] = max(score[i - 1][0] - gapOpen, F[i - 1][0] - gapExtend);
+ E[i][0] = -GAP_OPEN_COST;
+ F[i][0] = max(score[i - 1][0] - GAP_OPEN_COST, F[i - 1][0] - GAP_EXTEND_COST);
- score[i][0] = max(lookup[seq1[i]][seq2[0]] * 10, E[i][0], F[i][0]);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(0));
+ score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
traceback[i][0] = -1;
}
{
for (int j = 1; j < m; j++)
{
- E[i][j] = max(score[i][j - 1] - gapOpen, E[i][j - 1] - gapExtend);
- F[i][j] = max(score[i - 1][j] - gapOpen, F[i - 1][j] - gapExtend);
+ E[i][j] = max(score[i][j - 1] - GAP_OPEN_COST, E[i][j - 1] - GAP_EXTEND_COST);
+ F[i][j] = max(score[i - 1][j] - GAP_OPEN_COST, F[i - 1][j] - GAP_EXTEND_COST);
- score[i][j] = max(score[i - 1][j - 1]
- + (lookup[seq1[i]][seq2[j]] * 10), E[i][j], F[i][j]);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(j));
+ score[i][j] = max(score[i - 1][j - 1] + (pairwiseScore * 10),
+ E[i][j], F[i][j]);
traceback[i][j] = findTrace(i, j);
}
}
for (int i = 0; i < s.length(); i++)
{
char c = s.charAt(i);
- encoded[i] = scoreModel.getMatrixIndex(c);
+ encoded[i] = scoreMatrix.getMatrixIndex(c);
}
return encoded;
public static void displayMatrix(Graphics g, int[][] mat, int n, int m,
int psize)
{
- // TODO method dosen't seem to be referenced anywhere delete??
+ // TODO method doesn't seem to be referenced anywhere delete??
int max = -1000;
int min = 1000;
*/
public jalview.datamodel.Mapping getMappingFromS1(boolean allowmismatch)
{
- ArrayList<Integer> as1 = new ArrayList<Integer>(), as2 = new ArrayList<Integer>();
+ ArrayList<Integer> as1 = new ArrayList<Integer>(),
+ as2 = new ArrayList<Integer>();
int pdbpos = s2.getStart() + getSeq2Start() - 2;
int alignpos = s1.getStart() + getSeq1Start() - 2;
int lp2 = pdbpos - 3, lp1 = alignpos - 3;
}
// construct range pairs
- int[] mapseq1 = new int[as1.size() + (lastmatch ? 1 : 0)], mapseq2 = new int[as2
- .size() + (lastmatch ? 1 : 0)];
+ int[] mapseq1 = new int[as1.size() + (lastmatch ? 1 : 0)],
+ mapseq2 = new int[as2.size() + (lastmatch ? 1 : 0)];
int i = 0;
for (Integer ip : as1)
{
List<SequenceI> ochains, AlignmentI al, String dnaOrProtein,
boolean removeOldAnnots)
{
- List<SequenceI> orig = new ArrayList<SequenceI>(), repl = new ArrayList<SequenceI>();
+ List<SequenceI> orig = new ArrayList<SequenceI>(),
+ repl = new ArrayList<SequenceI>();
List<AlignSeq> aligs = new ArrayList<AlignSeq>();
if (al != null && al.getHeight() > 0)
{
bestm = msq;
}
}
- System.out.println("Best Score for " + (matches.size() + 1) + " :"
- + bestscore);
+ // System.out.println("Best Score for " + (matches.size() + 1) + " :"
+ // + bestscore);
matches.add(bestm);
aligns.add(bestaseq);
al.deleteSequence(bestm);
// long start = System.currentTimeMillis();
+ SimilarityParams pidParams = new SimilarityParams(true, true, true,
+ true);
float pid;
String seqi, seqj;
for (int i = 0; i < height; i++)
seqj = ug;
}
}
- pid = Comparison.PID(seqi, seqj);
+ pid = (float) PIDModel.computePID(seqi, seqj, pidParams);
// use real sequence length rather than string length
if (lngth[j] < lngth[i])