4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
15 #include "SubMatrix.h"
17 #include "../general/InvalidCombination.cpp"
18 #include "../general/debuglogObject.h"
28 SubMatrix::SubMatrix()
32 QTDNAHistMatNum(DNAIUB),
33 QTAAHistMatNum(AAHISTGONNETPAM250),
34 QTsegmentDNAMatNum(DNAIUB),
35 QTsegmentAAMatNum(QTAASEGGONNETPAM250)
39 setUpCrossReferences();
42 if(logObject && DEBUGLOG)
44 logObject->logMsg("Creating the SubMatrix object\n");
50 /* Set up the vectors with the matrices defined in matrices.h
51 * The matrices are intially defined as a short array, as there is no way
52 * to intiailize a vector with a {....} list.
54 blosum30mtVec = new Matrix(blosum30mt, blosum30mt + sizenAAMatrix);
55 blosum40mtVec = new Matrix(blosum40mt, blosum40mt + sizenAAMatrix);
56 blosum45mtVec = new Matrix(blosum45mt, blosum45mt + sizenAAMatrix);
57 blosum62mt2Vec = new Matrix(blosum62mt2, blosum62mt2 + sizenAAMatrix);
58 blosum80mtVec = new Matrix(blosum80mt, blosum80mt + sizenAAMatrix);
59 pam20mtVec = new Matrix(pam20mt, pam20mt + sizenAAMatrix);
60 pam60mtVec = new Matrix(pam60mt, pam60mt + sizenAAMatrix);
61 pam120mtVec = new Matrix(pam120mt, pam120mt + sizenAAMatrix);
62 pam350mtVec = new Matrix(pam350mt, pam350mt + sizenAAMatrix);
63 idmatVec = new Matrix(idmat, idmat + sizenAAMatrix);
64 gon40mtVec = new Matrix(gon40mt, gon40mt + sizenAAMatrix);
65 gon80mtVec = new Matrix(gon80mt, gon80mt + sizenAAMatrix);
66 gon120mtVec = new Matrix(gon120mt, gon120mt + sizenAAMatrix);
67 gon160mtVec = new Matrix(gon160mt, gon160mt + sizenAAMatrix);
68 gon250mtVec = new Matrix(gon250mt, gon250mt + sizenAAMatrix);
69 gon350mtVec = new Matrix(gon350mt, gon350mt + sizenAAMatrix);
70 clustalvdnamtVec = new Matrix(clustalvdnamt, clustalvdnamt + sizeDNAMatrix);
71 swgapdnamtVec = new Matrix(swgapdnamt, swgapdnamt + sizeDNAMatrix);
74 * Set up the vectors for user defined types.
75 * Probably dont need to do this, as they may not be used. It would be better
76 * to initialise their size if thye are used.
78 userMat.resize(NUMRES * NUMRES);
79 pwUserMat.resize(NUMRES * NUMRES);
80 userDNAMat.resize(NUMRES * NUMRES);
81 pwUserDNAMat.resize(NUMRES * NUMRES);
82 QTscoreUserMatrix.resize(NUMRES * NUMRES);
83 QTscoreUserDNAMatrix.resize(NUMRES * NUMRES);
84 QTsegmentDNAMatrix.resize(NUMRES * NUMRES);
85 QTsegmentAAMatrix.resize(NUMRES * NUMRES);
87 userMatSeries.resize(MAXMAT); // Maximum number of matrices
88 vector<Matrix>::iterator firstM = userMatSeries.begin();
89 vector<Matrix>::iterator lastM = userMatSeries.end();
91 while(firstM != lastM)
93 firstM->resize(NUMRES * NUMRES);
97 // Maybe I should put this in with the other xref intialisations!
98 AAXrefseries.resize(MAXMAT);
99 vector<Xref>::iterator firstX = AAXrefseries.begin();
100 vector<Xref>::iterator lastX = AAXrefseries.end();
102 while(firstX != lastX)
104 firstX->resize(NUMRES + 1);
110 matrixName = new string("gonnet");
112 DNAMatrixName = new string("iub");
114 pwMatrixName = new string("gonnet");
116 pwDNAMatrixName = new string("iub");
118 catch(const exception &ex)
120 cerr << ex.what() << endl;
121 cerr << "Terminating program. Cannot continue\n";
126 void SubMatrix::setValuesToDefault()
129 QTDNAHistMatNum = DNAIUB;
130 QTAAHistMatNum = AAHISTGONNETPAM250;
131 QTsegmentDNAMatNum = DNAIUB;
132 QTsegmentAAMatNum = QTAASEGGONNETPAM250;
141 * The destructor frees up any dynamically allocated memory.
143 SubMatrix::~SubMatrix()
145 delete blosum30mtVec;
146 delete blosum40mtVec;
147 delete blosum45mtVec;
148 delete blosum62mt2Vec;
149 delete blosum80mtVec;
161 delete clustalvdnamtVec;
162 delete swgapdnamtVec;
164 delete DNAMatrixName;
166 delete pwDNAMatrixName;
170 * This function sets up the initial cross references.
172 void SubMatrix::setUpCrossReferences()
176 defaultAAXref.resize(NUMRES + 1);
177 defaultDNAXref.resize(NUMRES + 1);
179 string aminoAcidOrder = "ABCDEFGHIKLMNPQRSTVWXYZ";
180 string nucleicAcidOrder = "ABCDGHKMNRSTUVWXY";
182 * I also need to resize the user defined xrefs.
184 DNAXref.resize(NUMRES + 1);
185 AAXref.resize(NUMRES + 1);
186 pwAAXref.resize(NUMRES + 1);
187 pwDNAXref.resize(NUMRES + 1);
188 QTscoreXref.resize(NUMRES + 1);
189 QTscoreDNAXref.resize(NUMRES + 1);
190 QTsegmentDNAXref.resize(NUMRES + 1);
191 QTsegmentAAXref.resize(NUMRES + 1);
193 * set up cross-reference for default matrices hard-coded in matrices.h
195 for (i = 0; i < NUMRES; i++)
197 defaultAAXref[i] = -1;
199 for (i = 0; i < NUMRES; i++)
201 defaultDNAXref[i] = -1;
206 for (i = 0; (c1 = aminoAcidOrder[i]); i++)
208 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
212 defaultAAXref[i] = j;
218 if ((defaultAAXref[i] == - 1) && (aminoAcidOrder[i] != '*'))
220 utilityObject->error("residue %c in matrices.h is not recognised",
226 for (i = 0; (c1 = nucleicAcidOrder[i]); i++)
228 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
232 defaultDNAXref[i] = j;
237 if ((defaultDNAXref[i] == - 1) && (nucleicAcidOrder[i] != '*'))
239 utilityObject->error("nucleic acid %c in matrices.h is not recognised",
240 nucleicAcidOrder[i]);
247 * The function getPairwiseMatrix is called by the user to get the correct sub matrix for the
248 * pairwise alignment stage. It calls getMatrix to actually calculate the matrix.
249 * This function provides an interface for the user. It allows the user to get the matrix
250 * they wish to use for the pairwise alignment part.
256 int SubMatrix::getPairwiseMatrix(int matrix[NUMRES][NUMRES], PairScaleValues& scale,
259 int _maxRes; // Local copy.
260 /* Pointers to Matrix and xref to use in calculation */
265 if(logObject && DEBUGLOG)
267 logObject->logMsg("In the function getPairwiseMatrix: \n");
271 string matrixPointer;
277 scale.intScale = 100;
279 scale.gapOpenScale = scale.gapExtendScale = 1.0;
281 if (userParameters->getDNAFlag())
284 if(logObject && DEBUGLOG)
286 string msg = " (DNA AND Pairwise) " + *pwDNAMatrixName + "\n";
287 logObject->logMsg(msg);
290 if (pwDNAMatrixName->compare("iub") == 0)
292 matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
293 _matPtr = swgapdnamtVec;
294 _matXref = &defaultDNAXref;
296 else if (pwDNAMatrixName->compare("clustalw") == 0)
298 matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
299 _matPtr = clustalvdnamtVec;
300 _matXref = &defaultDNAXref;
301 scale.gapOpenScale = 0.6667;
302 scale.gapExtendScale = 0.751;
306 matrixPointer = "pwUserDNAMat"; xrefPointer = "pwDNAXref";
307 _matPtr = &pwUserDNAMat;
308 _matXref = &pwDNAXref;
310 _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
316 float _transitionWeight = userParameters->getTransitionWeight();
317 // Mark change 17-5-07
318 matrix[0][4] = static_cast<int>(_transitionWeight * matrix[0][0]);
319 matrix[4][0] = static_cast<int>(_transitionWeight * matrix[0][0]);
320 matrix[2][11] = static_cast<int>(_transitionWeight * matrix[0][0]);
321 matrix[11][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
322 matrix[2][12] = static_cast<int>(_transitionWeight * matrix[0][0]);
323 matrix[12][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
329 if(logObject && DEBUGLOG)
331 string msg = " (Protein AND Pairwise) " + *pwMatrixName + "\n";
332 logObject->logMsg(msg);
336 if (pwMatrixName->compare("blosum") == 0)
338 matrixPointer = "blosum30mtVec"; xrefPointer = "defaultAAXref";
339 _matPtr = blosum30mtVec;
340 _matXref = &defaultAAXref;
342 else if (pwMatrixName->compare("pam") == 0)
344 matrixPointer = "pam350mtVec"; xrefPointer = "defaultAAXref";
345 _matPtr = pam350mtVec;
346 _matXref = &defaultAAXref;
348 else if (pwMatrixName->compare("gonnet") == 0)
350 matrixPointer = "gon250mtVec"; xrefPointer = "defaultAAXref";
351 _matPtr = gon250mtVec;
352 scale.intScale /= 10;
353 _matXref = &defaultAAXref;
355 else if (pwMatrixName->compare("id") == 0)
357 matrixPointer = "idmatVec"; xrefPointer = "defaultAAXref";
359 _matXref = &defaultAAXref;
363 matrixPointer = "pwUserMat"; xrefPointer = "pwAAXref";
364 _matPtr = &pwUserMat;
365 _matXref = &pwAAXref;
368 _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
377 if(logObject && DEBUGLOG)
380 outs << " Called getMatrix with "
381 << matrixPointer << " and " << xrefPointer << ".\n"
382 << " intScale = " << scale.intScale << ", gapOpenScale = "
383 << scale.gapOpenScale << ", gapExtendScale = " << scale.gapExtendScale
385 logObject->logMsg(outs.str());
388 matAvg = matrixAvgScore;
393 * The function getProfileAlignMatrix provides an interface for the user to get
394 * the matrix to be used in the profile alignment. This depends on the matrix series
395 * that was chosen, and also on the percent identity.
403 int SubMatrix::getProfileAlignMatrix(int matrix[NUMRES][NUMRES], double pcid, int minLen,
404 PrfScaleValues& scaleParam, int& matAvg)
407 bool errorGiven = false;
408 bool _negMatrix = userParameters->getUseNegMatrix();
411 scaleParam.intScale = 100;
412 string matrixPointer;
416 if(logObject && DEBUGLOG)
418 logObject->logMsg("In the function getProfileAlignMatrix: \n");
421 if (userParameters->getDNAFlag())
424 if(logObject && DEBUGLOG)
427 outs << " (DNA AND Multiple align) "<< DNAMatrixName->c_str() << "\n";
428 logObject->logMsg(outs.str());
431 scaleParam.scale = 1.0;
432 if (DNAMatrixName->compare("iub") == 0)
434 _matPtr = swgapdnamtVec;
435 _matXref = &defaultDNAXref;
436 matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
438 else if (DNAMatrixName->compare("clustalw") == 0)
440 _matPtr = clustalvdnamtVec;
441 _matXref = &defaultDNAXref;
442 scaleParam.scale = 0.66;
443 matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
447 _matPtr = &userDNAMat;
449 matrixPointer = "userDNAMat"; xrefPointer = "DNAXref";
452 _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
453 static_cast<int>(scaleParam.intScale)); // Mark change 17-5-07
459 float _transitionWeight = userParameters->getTransitionWeight();
460 // fix suggested by Chanan Rubin at Compugen
461 matrix[(*_matXref)[0]][(*_matXref)[4]] =
462 static_cast<int>(_transitionWeight * matrix[0][0]);
463 matrix[(*_matXref)[4]][(*_matXref)[0]] =
464 static_cast<int>(_transitionWeight * matrix[0][0]);
465 matrix[(*_matXref)[2]][(*_matXref)[11]] =
466 static_cast<int>(_transitionWeight * matrix[0][0]);
467 matrix[(*_matXref)[11]][(*_matXref)[2]] =
468 static_cast<int>(_transitionWeight * matrix[0][0]);
469 matrix[(*_matXref)[2]][(*_matXref)[12]] =
470 static_cast<int>(_transitionWeight * matrix[0][0]);
471 matrix[(*_matXref)[12]][(*_matXref)[2]] =
472 static_cast<int>(_transitionWeight * matrix[0][0]);
475 else // Amino acid alignment!!!!
478 if(logObject && DEBUGLOG)
481 outs << " (Protein AND Multiple align) "<< matrixName->c_str() << "\n";
482 logObject->logMsg(outs.str());
486 scaleParam.scale = 0.75;
487 if (matrixName->compare("blosum") == 0)
489 scaleParam.scale = 0.75;
490 if (_negMatrix || userParameters->getDistanceTree() == false)
492 _matPtr = blosum40mtVec;
493 matrixPointer = "blosum40mtVec";
495 else if (pcid > 80.0)
497 _matPtr = blosum80mtVec;
498 matrixPointer = "blosum80mtVec";
500 else if (pcid > 60.0)
502 _matPtr = blosum62mt2Vec;
503 matrixPointer = "blosum62mt2Vec";
505 else if (pcid > 40.0)
507 _matPtr = blosum45mtVec;
508 matrixPointer = "blosum45mtVec";
510 else if (pcid > 30.0)
512 scaleParam.scale = 0.5;
513 _matPtr = blosum45mtVec;
514 matrixPointer = "blosum45mtVec";
516 else if (pcid > 20.0)
518 scaleParam.scale = 0.6;
519 _matPtr = blosum45mtVec;
520 matrixPointer = "blosum45mtVec";
524 scaleParam.scale = 0.6;
525 _matPtr = blosum30mtVec;
526 matrixPointer = "blosum30mtVec";
528 _matXref = &defaultAAXref;
529 xrefPointer = "defaultAAXref";
531 else if (matrixName->compare("pam") == 0)
533 scaleParam.scale = 0.75;
534 if (_negMatrix || userParameters->getDistanceTree() == false)
536 _matPtr = pam120mtVec;
537 matrixPointer = "pam120mtVec";
539 else if (pcid > 80.0)
541 _matPtr = pam20mtVec;
542 matrixPointer = "pam20mtVec";
544 else if (pcid > 60.0)
546 _matPtr = pam60mtVec;
547 matrixPointer = "pam60mtVec";
549 else if (pcid > 40.0)
551 _matPtr = pam120mtVec;
552 matrixPointer = "pam120mtVec";
556 _matPtr = pam350mtVec;
557 matrixPointer = "pam350mtVec";
559 _matXref = &defaultAAXref;
560 xrefPointer = "defaultAAXref";
562 else if (matrixName->compare("gonnet") == 0)
564 scaleParam.scale /= 2.0;
565 if (_negMatrix || userParameters->getDistanceTree() == false)
567 _matPtr = gon250mtVec;
568 matrixPointer = "gon250mtVec";
570 else if (pcid > 35.0)
572 _matPtr = gon80mtVec;
573 scaleParam.scale /= 2.0;
574 matrixPointer = "gon80mtVec";
576 else if (pcid > 25.0)
580 _matPtr = gon250mtVec;
581 matrixPointer = "gon250mtVec";
585 _matPtr = gon120mtVec;
586 matrixPointer = "gon120mtVec";
593 _matPtr = gon350mtVec;
594 matrixPointer = "gon350mtVec";
598 _matPtr = gon160mtVec;
599 matrixPointer = "gon160mtVec";
602 _matXref = &defaultAAXref;
603 xrefPointer = "defaultAAXref";
604 scaleParam.intScale /= 10;
606 else if (matrixName->compare("id") == 0)
609 _matXref = &defaultAAXref;
610 xrefPointer = "defaultAAXref";
611 matrixPointer = "idmatVec";
617 for (i = 0; i < matSeries.nmat; i++)
619 if (pcid >= matSeries.mat[i].llimit && pcid <=
620 matSeries.mat[i].ulimit)
631 utilityObject->warning(
632 "\nSeries matrix not found for sequence percent identity = %d.\n""(Using first matrix in series as a default.)\n""This alignment may not be optimal!\n""SUGGESTION: Check your matrix series input file and try again.", (int)pcid);
638 _matPtr = matSeries.mat[j].matptr;
639 _matXref = matSeries.mat[j].AAXref;
640 // this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit
641 scaleParam.scale = 0.5 + (pcid - matSeries.mat[j].llimit) / (
642 (matSeries.mat[j].ulimit - matSeries.mat[j].llimit) *2.0);
643 xrefPointer = "matSeries.mat[j].AAXref";
644 matrixPointer = "matSeries.mat[j].matptr";
650 xrefPointer = "AAXref";
651 matrixPointer = "userMat";
654 _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
655 static_cast<int>(scaleParam.intScale));
658 cerr << "Error: matrix " << matrixName << " not found\n";
664 if(logObject && DEBUGLOG)
667 outs << " Called getMatrix with "
668 << matrixPointer << " and " << xrefPointer << ".\n"
669 << " intScale = " << scaleParam.intScale << ", scale = "
670 << scaleParam.scale << ", pcid = " << pcid << "\n\n";
671 logObject->logMsg(outs.str());
675 matAvg = matrixAvgScore;
680 * The function getMatrix is what the other parts of the code call to get a useable
681 * substitution matrix. This is stored in matrix[NUMRES][NUMRES].
689 int SubMatrix::getMatrix(Matrix* matptr, Xref* xref, int matrix[NUMRES][NUMRES],
690 bool negFlag, int scale, bool minimise)
697 int av1, av2, av3, min, max;
699 for (i = 0; i < NUMRES; i++)
701 for (j = 0; j < NUMRES; j++)
709 for (i = 0; i <= userParameters->getMaxAA(); i++)
712 for (j = 0; j <= i; j++)
715 if ((ti != - 1) && (tj != - 1))
720 matrix[ti][ti] = k * scale;
725 matrix[ti][tj] = k * scale;
726 matrix[tj][ti] = k * scale;
736 for (i = 0; i <= userParameters->getMaxAA(); i++)
738 for (j = 0; j <= i; j++)
752 av1 /= (maxRes * maxRes) / 2;
754 av3 = static_cast<int>(av3 / (((float)(maxRes * maxRes - maxRes)) / 2));
755 matrixAvgScore = - av3;
757 min = max = matrix[0][0];
758 for (i = 0; i <= userParameters->getMaxAA(); i++)
759 for (j = 1; j <= i; j++)
761 if (matrix[i][j] < min)
765 if (matrix[i][j] > max)
770 //cout << "MAX = " << max << "\n";
774 if requested, make a positive matrix - add -(lowest score) to every entry
776 if (negFlag == false)
780 for (i = 0; i <= userParameters->getMaxAA(); i++)
785 for (j = 0; j <= userParameters->getMaxAA(); j++)
791 matrix[ti][tj] -= min;
799 // local copies of the gap positions
800 int _gapPos1 = userParameters->getGapPos1();
801 int _gapPos2 = userParameters->getGapPos2();
803 for (i = 0; i < userParameters->getGapPos1(); i++)
805 matrix[i][_gapPos1] = grScore;
806 matrix[_gapPos1][i] = grScore;
807 matrix[i][_gapPos2] = grScore;
808 matrix[_gapPos2][i] = grScore;
810 matrix[_gapPos1][_gapPos1] = ggScore;
811 matrix[_gapPos2][_gapPos2] = ggScore;
812 matrix[_gapPos2][_gapPos1] = ggScore;
813 matrix[_gapPos1][_gapPos2] = ggScore;
817 // DO THE SAGA MATRIX
818 for (i = 0; i <= userParameters->getMaxAA(); i++)
820 for (j = 0; j <= userParameters->getMaxAA(); j++)
822 matrix[i][j] = max - matrix[i][j];
832 * The function getUserMatFromFile is used to read in a user defined matrix.
834 * @param alignResidueType
838 bool SubMatrix::getUserMatFromFile(char *str, int alignResidueType, int alignType)
843 // Need to check if the values are a valid combination!
844 checkResidueAndAlignType(alignResidueType, alignType);
846 if(userParameters->getMenuFlag())
848 utilityObject->getStr(string("Enter name of the matrix file"), line2);
855 if(line2.size() == 0)
860 if((infile = fopen(line2.c_str(), "r")) == NULL)
862 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
866 strcpy(str, line2.c_str());
867 // Find out which part of the code we are reading the matrix in for.
868 mat = getUserMatAddress(alignResidueType, alignType);
869 xref = getUserXrefAddress(alignResidueType, alignType);
871 if ((alignResidueType == Protein) && (alignType == MultipleAlign))
873 // Try read in a matrix series.
874 maxRes = readMatrixSeries(str, userMat, AAXref);
876 else // Read in a single matrix!!!
878 maxRes = readUserMatrix(str, *mat, *xref);
881 if (maxRes <= 0) return false;
887 * The function compareMatrices is used to compare 2 matrices that have been read in.
888 * It will compare them in a given region. It will not compare all of them, as some of it
889 * will be random memory.
893 void SubMatrix::compareMatrices(int mat1[NUMRES][NUMRES], int mat2[NUMRES][NUMRES])
896 for(int row = 0; row < NUMRES; row++)
898 for(int col = 0; col < NUMRES; col++)
900 if(mat1[row][col] != mat2[row][col])
903 cout << "The row is " << row << ". The column is " << col << endl;
904 break; // It is not the same. End the loop.
911 cout << "It was not the same\n";
915 cout << "It is the same\n";
920 * This function is simply to display the results of the getMatrix function.
921 * This is so that I can compare it to the original clustal version of it.
924 void SubMatrix::printGetMatrixResults(int mat[NUMRES][NUMRES])
926 ofstream outfile("getmatrix.out");
929 cerr<<"oops failed to open !!!\n";
931 for(int row = 0; row < NUMRES; row++)
933 for(int col = 0; col < NUMRES; col++)
935 if((mat[row][col] > 9) || (mat[row][col] < 0))
937 outfile <<" "<< mat[row][col]<<",";
941 outfile <<" "<< mat[row][col]<<",";
949 * This function is from the old interface. It simply calls the readMatrixSeries
950 * function. It seems to only be called from the commandline part.
954 bool SubMatrix::getUserMatSeriesFromFile(char *str)
960 if(userParameters->getMenuFlag()) // Check if we are using menu!
962 utilityObject->getStr(string("Enter name of the matrix file"), line2);
970 if(line2.size() == 0) return false;
972 if((infile = fopen(line2.c_str(), "r"))==NULL)
974 utilityObject->error("Cannot find matrix file [%s]",line2.c_str());
978 strcpy(str, line2.c_str());
980 maxRes = readMatrixSeries(str, userMat, AAXref);
981 if (maxRes <= 0) return false;
987 * The function readMatrixSeries is used to read in a series of matrices from a file.
988 * It calls readUserMatrix to read in the individual matrices. The matrices are stored
991 int SubMatrix::readMatrixSeries(const char *fileName, Matrix& userMat, Xref& xref)
994 char mat_fileName[FILENAMELEN];
998 int n, llimit, ulimit;
1000 if (fileName[0] == '\0')
1002 utilityObject->error("comparison matrix not specified");
1005 if ((fd = fopen(fileName, "r")) == NULL)
1007 utilityObject->error("cannot open %s", fileName);
1011 /* check the first line to see if it's a series or a single matrix */
1012 while (fgets(inline1, 1024, fd) != NULL)
1014 if (commentline(inline1))
1018 if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1029 /* it's a single matrix */
1030 if (userSeries == false)
1033 maxRes = readUserMatrix(fileName, userMat, xref);
1037 /* it's a series of matrices, find the next MATRIX line */
1040 while (fgets(inline1, 1024, fd) != NULL)
1042 if (commentline(inline1))
1046 if (utilityObject->lineType(inline1, "MATRIX")) // Have found a matrix
1048 if (sscanf(inline1 + 6, "%d %d %s", &llimit, &ulimit, mat_fileName)
1051 utilityObject->error("Bad format in file %s\n", fileName);
1055 if (llimit < 0 || llimit > 100 || ulimit < 0 || ulimit > 100)
1057 utilityObject->error("Bad format in file %s\n", fileName);
1061 if (ulimit <= llimit)
1063 utilityObject->error("in file %s: lower limit is greater than upper (%d-%d)\n",
1064 fileName, llimit, ulimit);
1069 n = readUserMatrix(mat_fileName, userMatSeries[nmat],
1070 AAXrefseries[nmat]);
1072 //cout << "Read in matrix number " << nmat << "\n"; // NOTE Testing!!!!!
1073 char nameOfFile[] = "matrix";
1074 printInFormat(userMatSeries[nmat], nameOfFile);
1077 utilityObject->error("Bad format in matrix file %s\n", mat_fileName);
1081 matSeries.mat[nmat].llimit = llimit;
1082 matSeries.mat[nmat].ulimit = ulimit;
1083 matSeries.mat[nmat].matptr = &userMatSeries[nmat];
1084 matSeries.mat[nmat].AAXref = &AAXrefseries[nmat];
1089 // We have read in all the matrices that we can read into this vector
1090 // Write a message to the screen, and break out of loop.
1091 cerr << "The matrix series file has more entries than allowed in \n"
1092 << "a user defined series. The most that are allowed is "
1094 << "The first " << MAXMAT << " have been read in and will be used.\n";
1095 break; // Get out of the loop!
1100 matSeries.nmat = nmat;
1107 * This function is used to read a single user matrix from a file.
1108 * It can be called repeatedly if there are multiple matrices in the file.
1110 int SubMatrix::readUserMatrix(const char *fileName, Matrix& userMat, Xref& xref)
1118 char *args[NUMRES + 4];
1124 if (fileName[0] == '\0')
1126 utilityObject->error("comparison matrix not specified");
1130 if ((fd = fopen(fileName, "r")) == NULL)
1132 utilityObject->error("cannot open %s", fileName);
1136 while (fgets(inline1, 1024, fd) != NULL)
1138 if (commentline(inline1))
1142 if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1144 utilityObject->error("in %s - single matrix expected.", fileName);
1149 read residue characters.
1152 for (j = 0; j < (int)strlen(inline1); j++)
1154 if (isalpha((int)inline1[j]))
1156 codes[k++] = inline1[j];
1160 utilityObject->error("too many entries in matrix %s", fileName);
1171 utilityObject->error("wrong format in matrix %s", fileName);
1177 cross-reference the residues
1179 for (i = 0; i < NUMRES; i++)
1185 for (i = 0; (c1 = codes[i]); i++)
1187 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
1194 if ((xref[i] == - 1) && (codes[i] != '*'))
1196 utilityObject->warning("residue %c in matrix %s not recognised", codes[i],
1207 while (fgets(inline1, 1024, fd) != NULL)
1209 if (inline1[0] == '\n')
1213 if (inline1[0] == '#' || inline1[0] == '!')
1217 numargs = getArgs(inline1, args, (int)(k + 1));
1218 if (numargs < maxRes)
1220 utilityObject->error("wrong format in matrix %s", fileName);
1224 if (isalpha(args[0][0]))
1233 /* decide whether the matrix values are float or decimal */
1235 for (i = 0; i < (int)strlen(args[farg]); i++)
1236 if (args[farg][i] == '.')
1238 /* we've found a float value */
1243 for (i = 0; i <= ix; i++)
1247 f = atof(args[i + farg]);
1248 userMat[ix1++] = (short)(f *scale);
1255 utilityObject->error("wrong format in matrix %s", fileName);
1260 userMat.resize(ix1 + 1);
1275 int SubMatrix::getArgs(char *inline1,char *args[],int max)
1281 for (i = 0; i <= max; i++)
1283 if ((args[i] = strtok(inptr, " \t\n")) == NULL)
1298 int SubMatrix::getMatrixNum()
1307 int SubMatrix::getDNAMatrixNum()
1309 return DNAMatrixNum;
1316 int SubMatrix::getPWMatrixNum()
1325 int SubMatrix::getPWDNAMatrixNum()
1327 return pwDNAMatrixNum;
1331 * The function setCurrentNameAndNum is used to select a matrix series.
1332 * This will then be used for the alignment. The matrices will change, but the
1333 * series remains the same. We can set the series for pairwise/full for both
1334 * protein and DNA. NOTE: The default can be set to user defined matrices.
1335 * @param _matrixName
1337 * @param alignResidueType
1340 void SubMatrix::setCurrentNameAndNum(string _matrixName, int _matrixNum,
1341 int alignResidueType,int alignType)
1343 // Check if the values are valid.
1344 checkResidueAndAlignType(alignResidueType, alignType);
1348 if((alignResidueType == Protein) && (alignType == Pairwise))
1350 residue = "Protein"; align = "Pairwise";
1351 pwMatrixNum = _matrixNum;
1352 pwMatrixName = new string(_matrixName);
1354 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1356 residue = "Protein"; align = "MultipleAlign";
1357 matrixNum = _matrixNum;
1358 matrixName = new string(_matrixName);
1360 else if((alignResidueType == DNA) && (alignType == Pairwise))
1362 residue = "DNA"; align = "Pairwise";
1363 pwDNAMatrixNum = _matrixNum;
1364 pwDNAMatrixName = new string(_matrixName);
1366 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1368 residue = "DNA"; align = "MultipleAlign";
1369 DNAMatrixNum = _matrixNum;
1370 DNAMatrixName = new string(_matrixName);
1375 if(logObject && DEBUGLOG)
1378 outs << "The matrix/matrix series has been changed for "
1379 << "(" << residue << " AND " << align << ")."
1380 << " New value: " << _matrixName << "\n\n";
1381 logObject->logMsg(outs.str());
1388 * @param alignResidueType
1392 int SubMatrix::getMatrixNumForMenu(int alignResidueType, int alignType)
1394 checkResidueAndAlignType(alignResidueType, alignType);
1396 if((alignResidueType == Protein) && (alignType == Pairwise))
1400 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1404 else if((alignResidueType == DNA) && (alignType == Pairwise))
1406 return pwDNAMatrixNum;
1408 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1410 return DNAMatrixNum;
1413 return -100; // NOTE NONE of these. I need to put in better error checking
1421 bool SubMatrix::commentline(char* line)
1429 for (i = 0; line[i] != '\n' && line[i] != EOS; i++)
1431 if (!isspace(line[i]))
1440 * This function prints out the vector to the file specified by name.
1441 * The vector is printed out in a triangular format, the same as the way
1442 * the arrays are displayed in matrices.h. This function is for testing purposes.
1446 void SubMatrix::printInFormat(vector<short>& temp, char* name)
1448 char nameOfFile[30];
1449 strcpy(nameOfFile, name);
1450 strcat(nameOfFile, ".out");
1452 ofstream outfile(nameOfFile);
1455 cerr<<"oops failed to open !!!\n";
1457 outfile<<"short "<<name<<"[]{\n";
1459 int numOnCurrentLine = 1;
1461 for(int i = 0; i < (int)temp.size(); i++)
1463 if(soFar == numOnCurrentLine)
1469 if((temp[i] > 9) || (temp[i] < 0))
1471 outfile <<" "<< temp[i]<<",";
1475 outfile <<" "<< temp[i]<<",";
1478 // Now increment so far
1480 // Check to see if the next element is the last element
1481 if((i + 1) == (int)temp.size() - 1)
1483 // Print out the last element. Then a curly brace, and break.
1484 if((temp[i+1] > 9) || (temp[i+1] < 0))
1486 outfile <<" "<< temp[i + 1]<<"};\n";
1490 outfile <<" "<< temp[i + 1]<<"};\n";
1496 ofstream outfile2("temp.out");
1497 for(int i = 0; i < (int)temp.size(); i++)
1499 outfile2 << temp[i] << " ";
1509 void SubMatrix::printVectorToFile(vector<short>& temp, char* name)
1511 char nameOfFile[30];
1512 strcpy(nameOfFile, name);
1513 strcat(nameOfFile, ".out");
1515 ofstream outfile(nameOfFile);
1518 cerr<<"oops failed to open !!!\n";
1520 for(int i = 0; i < (int)temp.size(); i++)
1522 if((temp[i] > 9) || (temp[i] < 0))
1524 outfile <<" "<< temp[i]<<",";
1528 outfile <<" "<< temp[i]<<",";
1537 * @param alignResidueType
1541 Matrix* SubMatrix::getUserMatAddress(int alignResidueType, int alignType)
1543 if((alignResidueType == Protein) && (alignType == Pairwise))
1547 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1551 else if((alignResidueType == DNA) && (alignType == Pairwise))
1553 return &pwUserDNAMat;
1555 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1565 * @param alignResidueType
1569 Xref* SubMatrix::getUserXrefAddress(int alignResidueType, int alignType)
1571 if((alignResidueType == Protein) && (alignType == Pairwise))
1575 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1579 else if((alignResidueType == DNA) && (alignType == Pairwise))
1583 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1591 * This is an error handling routine. If an incorrect combination of values
1592 * is given, it will terminate the program.
1593 * @param alignResidueType
1596 void SubMatrix::checkResidueAndAlignType(int alignResidueType, int alignType)
1598 if(((alignResidueType != 0) && (alignResidueType != 1))
1599 || ((alignType != 0) && (alignType != 1)))
1601 InvalidCombination ex(alignResidueType, alignType);
1608 * The function tempInterface is used to call the SubMatrix in the way it is
1609 * supposed to be used. It is for testing purposes.
1610 * @param alignResidueType
1613 void SubMatrix::tempInterface(int alignResidueType, int alignType)
1615 char userFile[FILENAMELEN + 1];
1617 userParameters->setDNAFlag(true);
1618 strcpy(userFile, "mat1");
1619 userParameters->setMenuFlag(false);
1620 getUserMatFromFile(userFile, DNA, Pairwise);
1621 setCurrentNameAndNum(userFile, 4, 3, Pairwise);
1623 setCurrentNameAndNum("gonnet", 4, Protein, Pairwise);
1627 * A single matrix is used in scoring the alignment. This is Blosum45. This is the
1628 * function to get it.
1632 int SubMatrix::getAlnScoreMatrix(int matrix[NUMRES][NUMRES])
1636 //_maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1637 _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, false, 1, true);
1638 //_maxNumRes = getMatrix(blosum62mt2Vec, &defaultAAXref, matrix, true, 100);
1641 _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1647 * This function is used to get the matrix that will be used for the calculation of
1648 * the histogram. The histogram values are used in ClustalQt.
1653 void SubMatrix::getQTMatrixForHistogram(int matrix[NUMRES][NUMRES])
1655 Matrix* _matPtrLocal;
1656 Xref* _matXrefLocal;
1658 if(userParameters->getDNAFlag())
1660 if (QTDNAHistMatNum == DNAUSERDEFINED)
1662 _matPtrLocal = &QTscoreUserDNAMatrix;
1663 _matXrefLocal = &QTscoreDNAXref;
1665 else if (QTDNAHistMatNum == DNACLUSTALW)
1667 _matPtrLocal = clustalvdnamtVec;
1668 _matXrefLocal = &defaultDNAXref;
1672 _matPtrLocal = swgapdnamtVec;
1673 _matXrefLocal = &defaultDNAXref;
1678 if (QTAAHistMatNum == AAHISTIDENTITY)
1680 _matPtrLocal = idmatVec;
1681 _matXrefLocal = &defaultAAXref;
1683 else if (QTAAHistMatNum == AAHISTGONNETPAM80)
1685 _matPtrLocal = gon80mtVec;
1686 _matXrefLocal = &defaultAAXref;
1688 else if (QTAAHistMatNum == AAHISTGONNETPAM120)
1690 _matPtrLocal = gon120mtVec;
1691 _matXrefLocal = &defaultAAXref;
1693 else if (QTAAHistMatNum == AAHISTUSER)
1695 _matPtrLocal = &QTscoreUserMatrix;
1696 _matXrefLocal = &QTscoreXref;
1698 else if (QTAAHistMatNum == AAHISTGONNETPAM350)
1700 _matPtrLocal = gon350mtVec;
1701 _matXrefLocal = &defaultAAXref;
1705 _matPtrLocal = gon250mtVec;
1706 _matXrefLocal = &defaultAAXref;
1709 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1713 void SubMatrix::getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES])
1715 Matrix* _matPtrLocal;
1716 Xref* _matXrefLocal;
1718 int _maxAA = userParameters->getMaxAA();
1722 if(userParameters->getDNAFlag())
1724 if (QTsegmentDNAMatNum == DNAUSERDEFINED)
1726 _matPtrLocal = &QTsegmentDNAMatrix;
1727 _matXrefLocal = &QTsegmentDNAXref;
1729 else if (QTsegmentDNAMatNum == DNACLUSTALW)
1731 _matPtrLocal = clustalvdnamtVec;
1732 _matXrefLocal = &defaultDNAXref;
1736 _matPtrLocal = swgapdnamtVec;
1737 _matXrefLocal = &defaultDNAXref;
1739 /* get a positive matrix - then adjust it according to scale */
1740 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1741 /* find the maximum value */
1742 for(int i = 0; i <= _maxAA; i++)
1744 for(int j = 0; j <= _maxAA; j++)
1746 if(matrix[i][j] > max)
1752 /* subtract max * scale / 2 from each matrix value */
1753 offset = static_cast<int>(static_cast<float>(max *
1754 userParameters->getQTlowScoreDNAMarkingScale()) / 20.0);
1756 for(int i = 0; i <= _maxAA; i++)
1758 for(int j = 0; j <= _maxAA; j++)
1760 matrix[i][j] -= offset;
1766 if (QTsegmentAAMatNum == QTAASEGGONNETPAM80)
1768 _matPtrLocal = gon80mtVec;
1769 _matXrefLocal = &defaultAAXref;
1771 else if (QTsegmentAAMatNum == QTAASEGGONNETPAM120)
1773 _matPtrLocal = gon120mtVec;
1774 _matXrefLocal = &defaultAAXref;
1776 else if (QTsegmentAAMatNum == QTAASEGUSER)
1778 _matPtrLocal = &QTsegmentAAMatrix;
1779 _matXrefLocal = &QTsegmentAAXref;
1781 else if (QTsegmentAAMatNum == QTAASEGGONNETPAM350)
1783 _matPtrLocal = gon350mtVec;
1784 _matXrefLocal = &defaultAAXref;
1788 _matPtrLocal = gon250mtVec;
1789 _matXrefLocal = &defaultAAXref;
1791 /* get a negative matrix */
1792 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, true, 100);
1796 bool SubMatrix::getQTLowScoreMatFromFile(char* fileName, bool dna)
1802 line2 = string(fileName);
1804 if(line2.size() == 0)
1809 if((infile = fopen(line2.c_str(), "r")) == NULL)
1811 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1815 strcpy(fileName, line2.c_str());
1819 maxRes = readUserMatrix(fileName, QTsegmentDNAMatrix, QTsegmentDNAXref);
1823 maxRes = readUserMatrix(fileName, QTsegmentAAMatrix, QTsegmentAAXref);
1834 bool SubMatrix::getAAScoreMatFromFile(char *str)
1840 line2 = string(str);
1842 if(line2.size() == 0)
1847 if((infile = fopen(line2.c_str(), "r")) == NULL)
1849 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1853 strcpy(str, line2.c_str());
1855 maxRes = readUserMatrix(str, QTscoreUserMatrix, QTscoreXref);
1865 bool SubMatrix::getDNAScoreMatFromFile(char *str)
1871 line2 = string(str);
1873 if(line2.size() == 0)
1878 if((infile = fopen(line2.c_str(), "r")) == NULL)
1880 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1884 strcpy(str, line2.c_str());
1886 maxRes = readUserMatrix(str, QTscoreUserDNAMatrix, QTscoreDNAXref);