4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to
10 * process the name vector.
12 * 05-03-07,Nigel Brown(EMBL): modified searchForString() to skip over gaps in
13 * sequence while matching.
15 * 26-03-07,Nigel Brown(EMBL): suppressed error message box for name clashes;
16 * added testUniqueNames() predicate, which compares new sequence names with
17 * those in the alignment vector BEFORE appending them.
25 #include "Alignment.h"
32 Alignment::Alignment()
33 : gapPos1(userParameters->getGapPos1()),
34 gapPos2(userParameters->getGapPos2())
38 maxAlignmentLength = 0;
39 lengthLongestSequence = 0;
43 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
44 /** remove gaps from older alignments (code = gap_pos1) */
45 void Alignment::resetProfile1()
47 register int sl; /* which have code = gap_pos2 */
49 int _gapPos1 = userParameters->getGapPos1();
50 int _gapPos2 = userParameters->getGapPos2();
51 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
52 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
54 if (userParameters->getStructPenalties1() != NONE)
57 for (j = 0; j < (int)gapPenaltyMask1.size(); ++j)
59 if (gapPenaltyMask1[j] == _gapPos1 && (_resetAlignNew ||
64 if (gapPenaltyMask1[j] == _gapPos2 && (_resetAlignAll))
68 gapPenaltyMask1[sl] = gapPenaltyMask1[j];
73 if (userParameters->getStructPenalties1() == SECST)
76 for (j = 0; j < (int)secStructMask1.size(); ++j)
78 if (secStructMask1[j] == _gapPos1 && (_resetAlignNew ||
83 if (secStructMask1[j] == _gapPos2 && (_resetAlignAll))
87 secStructMask1[sl] = secStructMask1[j];
92 for(i = 1; i <= profile1NumSeqs; ++i)
95 for(j = 1; j <= getSeqLength(i); ++j)
97 if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
101 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
106 seqArray[i][sl] = seqArray[i][j];
109 // Andreas Wilm (UCD): added 2008-03-07:
110 // Remove excess bit at end of sequence
111 int numExtraElements = seqArray[i].size() - 1 - sl;
112 for(int k = 0; k < numExtraElements; k++)
114 seqArray[i].pop_back();
119 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
120 void Alignment::resetProfile2()
122 register int sl; /* which have code = gap_pos2 */
124 int _gapPos1 = userParameters->getGapPos1();
125 int _gapPos2 = userParameters->getGapPos2();
126 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
127 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
128 int _profile1NumSeqs = profile1NumSeqs;
131 if (userParameters->getStructPenalties2() != NONE)
134 for (j = 0; j < (int)gapPenaltyMask2.size(); ++j)
136 if (gapPenaltyMask2[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
140 if (gapPenaltyMask2[j] == _gapPos2 && (_resetAlignAll))
144 gapPenaltyMask2[sl] = gapPenaltyMask2[j];
149 if (userParameters->getStructPenalties2() == SECST)
152 for (j = 0; j < (int)secStructMask2.size(); ++j)
154 if (secStructMask2[j] == _gapPos1 && (_resetAlignNew ||
159 if (secStructMask2[j] == _gapPos2 && (_resetAlignAll))
163 secStructMask2[sl] = secStructMask2[j];
168 for(i = _profile1NumSeqs + 1; i <= numSeqs; ++i)
171 for(j = 1; j <= getSeqLength(i); ++j)
173 if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
177 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
182 seqArray[i][sl] = seqArray[i][j];
186 // Andreas Wilm (UCD) added 2008-03-07:
187 // Remove excess bit at end of sequence
188 int numExtraElements = seqArray[i].size() - 1 - sl;
189 for(int k = 0; k < numExtraElements; k++)
191 seqArray[i].pop_back();
197 void Alignment::resetAllSeqWeights()
200 seqWeight.resize(numSeqs + 1, 100);
204 * The function addOutputIndex must check that the number of outputindex
205 * Not sure what to do with this one.
207 bool Alignment::addOutputIndex(vector<int>* outputIndexToAdd)
209 // First need to clear the old outputIndex
211 // Check if the size is correct
212 if((int)outputIndexToAdd->size() == numSeqs)
214 // Add the output index
215 outputIndex = *outputIndexToAdd;
221 return false; // Could not add them
227 * The function appendOutputIndex is used when we are appending the outputIndex
228 * to the current one. We do this when we are adding a profile2.
230 bool Alignment::appendOutputIndex(vector<int>* outputIndexToAppend)
232 // Check if the size is correct
233 if((int)(outputIndexToAppend->size() + outputIndex.size()) == numSeqs)
235 // Append the outputIndex
236 vector<int>::iterator outIndexIter = outputIndexToAppend->begin();
237 while(outIndexIter != outputIndexToAppend->end())
239 outputIndex.push_back(*outIndexIter);
242 if((int)outputIndex.size() == numSeqs)
249 cerr << "There is a problem with adding the sequences\n";
261 * The function addSequences takes a vector of sequences and adds it to the Alignment.
262 * This is used if there is nothing in memory that we need.
265 void Alignment::addSequences(vector<Sequence>* seqVector)
269 numSeqs = seqVector->size();
270 vector<int> emptyVec;
272 // NOTE push dummy sequences on first!
273 /********************************************************
274 * It was decided to stay with the seqs and seqArray *
275 * index begining at 1. This is because it is difficult *
276 * to change in the code, so we push dummy seqs on *
277 ********************************************************/
278 seqArray.push_back(emptyVec); // EMPTY sequence
279 names.push_back(string(""));
280 titles.push_back(string(""));
281 sequenceIds.push_back(0);
283 addSequencesToVector(seqVector);
285 calculateMaxLengths();
286 seqWeight.resize(numSeqs + 1, 100);
290 * The function appendSequences is used when we have a profile2.
293 void Alignment::appendSequences(vector<Sequence>* seqVector)
295 numSeqs += seqVector->size(); // Add to the number we already have!
296 addSequencesToVector(seqVector);
298 seqWeight.resize(numSeqs + 1, 100);
299 calculateMaxLengths();
302 void Alignment::pasteSequencesIntoPosition(vector<Sequence>* seqVector, int pos,
303 bool explicitPasteToProfile2)
305 SeqArray::iterator seqArrayIterator;
306 vector<string>::iterator namesIterator;
307 vector<string>::iterator titlesIterator;
308 vector<unsigned long>::iterator sequenceIdsIterator;
310 int profNum = userParameters->getProfileNum();
311 int numSeqsToInsert = seqVector->size();
312 if(numSeqsToInsert == 0 || pos < 0)
318 seqArrayIterator = seqArray.end();
319 namesIterator = names.end();
320 titlesIterator = titles.end();
321 sequenceIdsIterator = sequenceIds.end();
325 seqArrayIterator = seqArray.begin() + pos + 1;
326 namesIterator = names.begin() + pos + 1;
327 titlesIterator = titles.begin() + pos + 1;
328 sequenceIdsIterator = sequenceIds.begin() + pos + 1;
331 int prof1NumSeqs = profile1NumSeqs;
333 for(int i = numSeqsToInsert - 1; i >= 0; i--)
335 seqArray.insert(seqArrayIterator, *(*seqVector)[i].getSequence());
336 names.insert(namesIterator, (*seqVector)[i].getName());
337 titles.insert(titlesIterator, (*seqVector)[i].getTitle());
338 sequenceIds.insert(sequenceIdsIterator, (*seqVector)[i].getIdentifier());
341 if(profNum != 0 && !explicitPasteToProfile2 && pos <= prof1NumSeqs)
347 if(profNum != 0 && pos <= prof1NumSeqs)
349 profile1NumSeqs = prof1NumSeqs;
352 resetAllSeqWeights();
353 setDefaultOutputIndex();
356 void Alignment::debugPrintAllNames()
358 vector<string>::iterator nameIter = names.begin();
359 while(nameIter != names.end())
361 cout << *nameIter << endl;
366 void Alignment::NameIterator::begin(Alignment *a) {
367 //cout << "begin()\n";
370 i = alignment->names.begin();
374 const string Alignment::NameIterator::next() {
375 //cout << "next()\n";
378 if (i == alignment->names.end())
383 bool Alignment::NameIterator::end() {
387 if (i == alignment->names.end())
392 // 26-03-03,nige: test before appending seqVector
393 bool Alignment::testUniqueNames(vector<Sequence>* seqVector, string *offendingSeq)
395 vector<string>::iterator oldName;
396 vector<Sequence>::iterator newName;
399 //iterate over new candidate names
400 for (newName = seqVector->begin(); unique && newName != seqVector->end(); newName++) {
401 //iterate over old stored names
402 for (oldName = names.begin() + 1; unique && oldName != names.end(); oldName++) {
403 if (*oldName == newName->getName()) {
404 offendingSeq->assign(*oldName);
414 * The function addSequencesToVector is used to add sequences to our seqVector
415 * It is used by both addSequences and appendSequences. This is to reduce code
418 void Alignment::addSequencesToVector(vector<Sequence>* seqVector)
420 std::vector<Sequence>::iterator itSeq;
422 for(itSeq = seqVector->begin(); itSeq != seqVector->end(); ++itSeq)
424 seqArray.push_back(*(*itSeq).getSequence());
425 names.push_back((*itSeq).getName());
426 titles.push_back((*itSeq).getTitle());
427 sequenceIds.push_back((*itSeq).getIdentifier());
430 if(!(((int)seqArray.size() == numSeqs + 1) && ((int)names.size() == numSeqs + 1)
431 && ((int)titles.size() == numSeqs + 1) && ((int)sequenceIds.size() == numSeqs + 1)))
433 cerr << "There has been an error adding the sequences to Alignment.\n"
434 << "Must terminate the program. EaddSequencesrror occured in addSequences.\n";
439 void Alignment::clearAlignment()
441 // Erase all the elements from the vector!
452 maxAlignmentLength = 0;
453 lengthLongestSequence = 0;
454 userParameters->setProfileNum(0);
455 userParameters->setProfile1Empty(true);
456 userParameters->setProfile2Empty(true);
459 void Alignment::clearSeqArray()
461 for(int i = 0; i < (int)seqArray.size(); i++)
468 void Alignment::clearSecStruct1()
470 gapPenaltyMask1.clear();
471 secStructMask1.clear();
475 void Alignment::clearSecStruct2()
477 gapPenaltyMask2.clear();
478 secStructMask2.clear();
483 * The function alignScore is used to score the alignment!
484 * This is used by other classes to see what score the alignment has gotten.
486 int Alignment::alignScore(void)
489 int seq1, seq2, res1, res2;
493 int _maxAA = userParameters->getMaxAA();
494 float _gapOpen = userParameters->getGapOpen();
495 int matrix[NUMRES][NUMRES];
498 // calculate an overall score for the alignment by summing the
499 // scores for each pairwise alignment
501 maxRes = subMatrix->getAlnScoreMatrix(matrix);
504 utilityObject->error("Matrix for alignment scoring not found\n");
509 for (seq1 = 1; seq1 <= numSeqs; seq1++)
511 for (seq2 = 1; seq2 < seq1; seq2++)
513 len1 = seqArray[seq1].size() - 1;
514 len2 = seqArray[seq2].size() - 1;
515 for (i = 1; i < len1 && i < len2; i++)
517 res1 = seqArray[seq1][i];
518 res2 = seqArray[seq2][i];
520 if ((res1 >= 0) && (res1 <= _maxAA) && (res2 >= 0) && (res2 <= _maxAA))
522 score += matrix[res1][res2];
526 ngaps = countGaps(seq1, seq2, len1);
528 score = static_cast<int>(score - (100 * _gapOpen * ngaps)); // Mark change 17-5-07
535 utilityObject->info("Alignment Score %d\n", score);
539 int Alignment::countGaps(int seq1, int seq2, int len)
545 Q.resize(len + 2, 0);
546 R.resize(len + 2, 0);
548 int _maxAA = userParameters->getMaxAA();
554 for (i = 1; i < len; i++)
556 if (seqArray[seq1][i] > _maxAA)
565 if (seqArray[seq2][i] > _maxAA)
574 // NOTE I havent a clue what this does!!!!
575 if (((Q[i - 1] <= R[i - 1]) && (q != 0) && (1-r != 0)) ||
576 ((Q[i - 1] >= R[i - 1]) && (1-q != 0) && (r != 0)))
600 catch(const exception &ex)
602 cerr << ex.what() << endl;
603 cerr << "Terminating program. Cannot continue. Function = countGaps\n";
610 void Alignment::resetAlign()
612 /* remove gaps from older alignments (code = gap_pos1) EXCEPT for
613 gaps that were INPUT with the seqs. which have code =
619 int _gapPos1 = userParameters->getGapPos1();
620 int _gapPos2 = userParameters->getGapPos2();
621 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
622 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
625 for(i = 1; i <= numSeqs;++i)
628 for(j = 1; j <= getSeqLength(i); ++j)
630 if(seqArray[i][j] == _gapPos1 && ( _resetAlignNew || _resetAlignAll))
634 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
639 seqArray[i][sl] = seqArray[i][j];
643 // Andreas Wilm (UCD) added 2008-03-07:
644 // Remove excess bit at end of sequence
645 int numExtraElements = seqArray[i].size() - 1 - sl;
646 for(int k = 0; k < numExtraElements; k++)
648 seqArray[i].pop_back();
653 void Alignment::fixGaps()
657 if (userParameters->getStructPenalties1() != NONE)
659 for (j = 0; j < getSeqLength(1); ++j)
661 if (gapPenaltyMask1[j] == userParameters->getGapPos1())
663 gapPenaltyMask1[j] = userParameters->getGapPos2();
668 if (userParameters->getStructPenalties1() == SECST)
670 for (j = 0; j < getSeqLength(1); ++j)
672 if (secStructMask1[j] == userParameters->getGapPos1())
674 secStructMask1[j] = userParameters->getGapPos2();
679 for(i = 1; i <= numSeqs; ++i)
681 for(j = 1; j <= getSeqLength(i); ++j)
683 if(seqArray[i][j] == userParameters->getGapPos1())
685 seqArray[i][j] = userParameters->getGapPos2();
692 float Alignment::countid(int s1, int s2)
698 int shorterSeqLength = (getSeqLength(s1) < getSeqLength(s2)) ? getSeqLength(s1) : getSeqLength(s2);
701 for (i = 1; i <= shorterSeqLength; i++) // NOTE june29
703 c1 = seqArray[s1][i];
704 c2 = seqArray[s2][i];
705 if ((c1 >= 0) && (c1 < userParameters->getMaxAA()))
722 score = 100.0 *(float)count / (float)total;
728 void Alignment::debugPrintSequences()
731 for(int i = 0; i < (int)seqArray.size(); i++)
733 for(int j = 0; j < (int)seqArray[i].size(); j++)
735 if(seqArray[i][j] > 9)
736 cout << " " << seqArray[i][j];
738 cout << " " << seqArray[i][j];
745 * Note the max_aln_length is now called maxAlignmentLength, and it will be stored
746 * and calculated in this class. Mostly it is used for allocating arrays. But not always.
748 void Alignment::calculateMaxLengths()
750 maxAlignmentLength = 0;
751 lengthLongestSequence = 0;
752 if(seqArray.size() > 0)
754 SeqArray::iterator seqArrayIter = seqArray.begin();
756 while(seqArrayIter != seqArray.end())
758 // NOTE I needed to change this to >= for a bug I had!!!!!!!
759 if((int)(*seqArrayIter).size() - 1 >= lengthLongestSequence)
761 lengthLongestSequence = (*seqArrayIter).size();
766 if(lengthLongestSequence > 0)
768 maxAlignmentLength = (lengthLongestSequence * 2) - 2;
769 lengthLongestSequence -= 1; // MADE A CHANGE HERE AS WELL!!
773 lengthLongestSequence = 0;
774 maxAlignmentLength = 0;
780 maxAlignmentLength = 0;
785 vector<string>::iterator nameVecIter = names.begin();
787 while(nameVecIter != names.end())
789 if((int)(*nameVecIter).size() > maxNames)
791 maxNames = (*nameVecIter).size();
807 * This function checks to see if all names are different. It returns true if they
808 * are all different, and false if there are 2 the same.
810 bool Alignment::checkAllNamesDifferent(string *offendingSeq)
812 bool different = true;
813 // NOTE I added the + 1 here because, if we had a sequence with a name as blank
814 // this would be the same as the first one!
815 vector<string>::iterator namesIter1 = names.begin() + 1;
816 vector<string>::iterator namesIter2;
821 while(namesIter1 != names.end())
823 namesIter2 = namesIter1 + 1;
824 while(namesIter2 != names.end())
826 if((*namesIter1).compare(*namesIter2) == 0) // If we have 2 strings the same.
829 /* 23-03-2007,nige: let someone up the stack deal with this - GUI is too deeply entangled.
830 * utilityObject->error("Multiple sequences found with same name '%s' (first %d chars are significant)\n", namesIter1->c_str(), MAXNAMES);
833 offendingSeq->assign((*namesIter1));
835 return different; // Not all different!
842 counter2 = counter1 + 1;
852 void Alignment::addSecStructMask1(vector<char>* secStructMaskToAdd)
854 secStructMask1 = *secStructMaskToAdd;
857 void Alignment::addSecStructMask2(vector<char>* secStructMaskToAdd)
859 secStructMask2 = *secStructMaskToAdd;
862 void Alignment::addGapPenaltyMask1(vector<char>* gapPenaltyMaskToAdd)
864 gapPenaltyMask1 = *gapPenaltyMaskToAdd;
867 void Alignment::addGapPenaltyMask2(vector<char>* gapPenaltyMaskToAdd)
869 gapPenaltyMask2 = *gapPenaltyMaskToAdd;
872 void Alignment::addSecStructName1(string nameToAdd)
874 secStructName1 = nameToAdd;
877 void Alignment::addSecStructName2(string nameToAdd)
879 secStructName2 = nameToAdd;
882 void Alignment::addSeqWeight(vector<int>* _seqWeight)
884 if(seqWeight.size() == _seqWeight->size())
886 int size = seqWeight.size();
888 for(int i = 0; i < size; i++)
890 seqWeight[i] = (*_seqWeight)[i];
895 void Alignment::printSequencesAddedInfo()
897 if(userParameters->getDisplayInfo())
899 int startSeq = userParameters->getProfile2Empty() ? 1:
901 string dnaFlag = userParameters->getDNAFlag() ? "bp" : "aa";
904 for(int i = startSeq; i <= numSeqs; i++)
906 cout << "Sequence " << i << ": "
907 << std::left << setw(maxNames) << names.at(i)
908 << std::right << setw(6) << getSequenceLength(i)
909 << " " << dnaFlag << std::endl;
914 void Alignment::debugPrintOutAlignInfo()
916 for(int i = 1; i <= numSeqs; i++)
918 cout << "seq-no=" << i << ": name="
919 << std::left << setw(maxNames) << names.at(i)
921 << std::right << setw(6) << getSequenceLength(i)
926 int Alignment::getSequenceLength(int index)
928 return seqArray.at(index).size() - 1;
931 int Alignment::getLengthLongestSequence()
933 int _lengthLongestSequence = 0;
935 for(int i = 1; i <= numSeqs; i++)
937 if(getSeqLength(i) > _lengthLongestSequence)
939 _lengthLongestSequence = getSeqLength(i);
942 return _lengthLongestSequence;
946 * This function is used to find the length of the longest sequence in the range.
948 int Alignment::getLengthLongestSequence(int firstSeq, int lastSeq)
950 int _lengthLongestSequence = 0;
952 if(firstSeq >= 1 && lastSeq <= numSeqs)
954 for(int i = firstSeq; i <= lastSeq; i++)
956 if(getSeqLength(i) > _lengthLongestSequence)
958 _lengthLongestSequence = getSeqLength(i);
962 return _lengthLongestSequence; // Will return 0 if cant check seqs
965 int Alignment::getMaxNames()
970 string Alignment::getSecStructName1()
972 return secStructName1;
975 string Alignment::getSecStructName2()
977 return secStructName2;
980 vector<char>* Alignment::getGapPenaltyMask1()
982 return &gapPenaltyMask1;
985 vector<char>* Alignment::getGapPenaltyMask2()
987 return &gapPenaltyMask2;
990 vector<char>* Alignment::getSecStructMask1()
992 return &secStructMask1;
995 vector<char>* Alignment::getSecStructMask2()
997 return &secStructMask2;
1001 int Alignment::getSecStructMask1Element(int index)
1003 if(index > 0 && index < (int)secStructMask1.size())
1005 return secStructMask1[index];
1009 throw VectorOutOfRange(string("secStructMask1"), index, secStructMask1.size() - 1);
1013 int Alignment::getSecStructMask2Element(int index)
1015 if(index > 0 && index < (int)secStructMask2.size())
1017 return secStructMask2[index];
1021 throw VectorOutOfRange(string("secStructMask2"), index, secStructMask2.size() - 1);
1025 int Alignment::getGapPenaltyMask1Element(int index)
1027 if(index > 0 && index < (int)gapPenaltyMask1.size())
1029 return gapPenaltyMask1[index];
1033 throw VectorOutOfRange(string("gapPenaltyMask1"), index, gapPenaltyMask1.size() - 1);
1037 int Alignment::getGapPenaltyMask2Element(int index)
1039 if(index > 0 && index < (int)gapPenaltyMask2.size())
1041 return gapPenaltyMask2[index];
1045 throw VectorOutOfRange(string("gapPenaltyMask2"), index, gapPenaltyMask2.size() - 1);
1049 string Alignment::getName(int index)
1051 if(index > 0 && index < (int)names.size())
1053 return names[index];
1057 throw VectorOutOfRange(string("names"), index, names.size() - 1);
1063 * Mark 25-1-2007: This function was added to allow access to the unique id.
1065 unsigned long Alignment::getUniqueId(int seq)
1067 if(seq > 0 && seq < (int)sequenceIds.size())
1069 return sequenceIds[seq];
1073 throw VectorOutOfRange(string("sequenceIds"), seq, sequenceIds.size() - 1);
1077 string Alignment::getTitle(int index)
1079 if(index > 0 && index < (int)titles.size())
1081 return titles[index];
1085 throw VectorOutOfRange(string("titles"), index, titles.size() - 1);
1089 int Alignment::getOutputIndex(int index)
1091 if(index >= 0 && index < (int)outputIndex.size())
1093 return outputIndex[index];
1097 throw VectorOutOfRange(string("outputIndex"), index, outputIndex.size() - 1);
1101 int Alignment::getSeqWeight(int index) const
1103 if(index >= 0 && index < (int)seqWeight.size())
1105 return seqWeight[index];
1109 throw VectorOutOfRange(string("seqWeight"), index, seqWeight.size() - 1);
1115 * This function is used by Qt. It is used to calculate the histogram values for the
1116 * widget in clustalQt. The function is in here because it needs access to the SubMatrix
1118 * @param matNum The number of the matrix to be used in the comparison.
1119 * @return A pointer to a vector containing the histogram values.
1121 vector<int>* Alignment::QTcalcHistColumnHeights(int firstSeq, int nSeqs,
1122 Array2D<int>* exceptionalRes)
1124 int n, i, s, p, r, r1;
1125 int numColumns = getLengthLongestSequence();
1126 int scoreScale = userParameters->getQTScorePlotScale();//5; // From ClustalX.
1127 int scoreCutOff = userParameters->getQTResExceptionCutOff();
1128 bool includeGaps = false;
1129 //short *mat_xref, *matptr;
1131 float t, q1, q3, ul;
1132 vector<float> seqdist, sorteddist;
1134 vector<int> seqvector;
1136 vector<vector<int> > profile;
1137 int matrix[NUMRES][NUMRES];
1138 int _maxAA = userParameters->getMaxAA();
1139 int _gapPos1 = userParameters->getGapPos1();
1140 histogramColumnHeights.resize(numColumns);
1142 subMatrix->getQTMatrixForHistogram(matrix);
1144 profile.resize(numColumns + 2, vector<int>(_maxAA + 2));
1145 freq.resize(_maxAA + 2);
1147 for(p = 0; p < numColumns; p++)
1149 for(r = 0; r < _maxAA; r++)
1153 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1155 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1156 seqArray[s + 1][p + 1] < _maxAA)
1158 freq[seqArray[s + 1][p + 1]]++;
1161 for(r = 0; r < _maxAA; r++)
1164 for(r1 = 0; r1 < _maxAA; r1++)
1166 profile[p][r] += freq[r1] * matrix[r1][r];
1168 profile[p][r] = static_cast<int>(profile[p][r] /
1169 static_cast<float>(nSeqs)); // Mark change 17-5-07
1173 seqvector.resize(_maxAA + 2);
1174 seqdist.resize(nSeqs + 1);
1175 sorteddist.resize(nSeqs + 1);
1177 for(p = 0; p < numColumns; p++)
1179 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1181 if (p < getSeqLength(s + 1))
1183 for (r = 0; r < _maxAA; r++)
1185 seqvector[r] = matrix[r][seqArray[s + 1][p + 1]];
1190 for (r = 0; r < _maxAA; r++)
1192 seqvector[r] = matrix[r][_gapPos1];
1195 seqdist[s - firstSeq] = 0.0;
1196 for(r = 0; r < _maxAA; r++)
1198 diff = profile[p][r] - seqvector[r];
1200 seqdist[s - firstSeq] += diff * diff;
1202 seqdist[s - firstSeq] = sqrt((double)seqdist[s - firstSeq]);
1205 // calculate mean,median and rms of seq distances
1206 mean = median = 0.0;
1209 for(s = 0; s < nSeqs; s++)
1215 for(s = 0; s < nSeqs; s++)
1217 sorteddist[s] = seqdist[s];
1223 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1225 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1226 seqArray[s + 1][p + 1] < _maxAA)
1228 mean += seqdist[s - firstSeq];
1236 for(s = firstSeq, i = 0; s < firstSeq + nSeqs; s++)
1238 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1239 seqArray[s + 1][p + 1] < _maxAA)
1241 sorteddist[i++] = seqdist[s - firstSeq];
1245 sortScores(&sorteddist, 0, n - 1);
1254 median = (sorteddist[n / 2 - 1] + sorteddist[n / 2]) / 2.0;
1258 median = sorteddist[n / 2];
1263 histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean *
1264 (6 - scoreScale) / 4.0)) * 100.0 * n / nSeqs);
1268 histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean /
1269 (4.0 * (scoreScale - 4)))) * 100.0 * n / nSeqs);
1279 if(t - (int)t == 0.5)
1281 q3 = (sorteddist[(int)t] + sorteddist[(int)t + 1]) / 2.0;
1282 q1 = (sorteddist[n-(int)t] + sorteddist[n - (int)t - 1]) / 2.0;
1284 else if(t - (int)t > 0.5)
1286 q3 = sorteddist[(int)t + 1];
1287 q1 = sorteddist[n - (int)t - 1];
1291 q3 = sorteddist[(int)t];
1292 q1 = sorteddist[n - (int)t];
1300 ul = q3 + (q3 - q1) * ((float)scoreCutOff / 2.0);
1304 if((exceptionalRes->getRowSize() >= nSeqs) &&
1305 exceptionalRes->getColSize() >= numColumns)
1307 for(int s = firstSeq; s < firstSeq + nSeqs; s++)
1309 if(seqdist[s - firstSeq] > ul && p < getSeqLength(s + 1) &&
1310 seqArray[s + 1][p + 1] >= 0 &&
1311 seqArray[s + 1][p + 1] < userParameters->getMaxAA())
1313 (*exceptionalRes)[s - firstSeq][p] = 1;
1317 (*exceptionalRes)[s - firstSeq][p] = 0;
1322 return &histogramColumnHeights;
1325 void Alignment::sortScores(vector<float>* scores, int f, int l)
1332 swap(scores, f, (f + l) / 2);
1334 for(i = f + 1; i <= l; i++)
1336 if((*scores)[i] > (*scores)[f])
1338 swap(scores, ++last, i);
1341 swap(scores, f, last);
1342 sortScores(scores, f, last - 1);
1343 sortScores(scores, last + 1, l);
1346 void Alignment::swap(vector<float>* scores, int s1, int s2)
1350 temp = (*scores)[s1];
1351 (*scores)[s1] = (*scores)[s2];
1352 (*scores)[s2] = temp;
1355 int Alignment::searchForString(bool* found, int seq, int beginRes, string search)
1357 int lengthSeq = getSeqLength(seq);
1358 if(beginRes > lengthSeq)
1366 // First need to convert search into a vector of ints!
1367 vector<int> codedSearch;
1368 int size = search.size();
1369 codedSearch.resize(size);
1371 for(int i = 0; i < size; i++)
1373 code = userParameters->resIndex(userParameters->getAminoAcidCodes(), search[i]);
1374 codedSearch[i] = code;
1379 int searchSize = codedSearch.size();
1380 // Now check for the string of ints!!!!
1381 for(; res <= lengthSeq; res++) //- nige: res starts at 1
1384 if(seqArray[seq][res] == codedSearch[0])
1386 startPos = res; //- nige
1387 for(int i = 0; i < searchSize && (i + res) <= lengthSeq; i++) //- nige: res starts at 1
1389 if(seqArray[seq][res + i] == codedSearch[i])
1393 //nige: hack: encoded gap character: see also AlignmentScroll.cpp
1394 else if (seqArray[seq][res + i] == 31 || seqArray[seq][res + i] == 30)
1401 break; // Not the same
1404 if(numSame == searchSize)
1416 return startPos; // Not found!!!!!!!
1419 void Alignment::removeGapsFromSelectedSeqs(vector<int>* selected)
1422 int size = selected->size();
1423 int lengthOfSelectedSeq = 0;
1424 int gapPos1 = userParameters->getGapPos1();
1425 int gapPos2 = userParameters->getGapPos2();
1428 for(int i = 1; i <= getNumSeqs() && i < size; i++)
1430 if((*selected)[i] == 1)
1432 // remove gaps from this seq!
1433 lengthOfSelectedSeq = getSeqLength(i);
1435 for(int j = 1; j <= lengthOfSelectedSeq; j++)
1437 if(seqArray[i][j] == gapPos1 || seqArray[i][j] == gapPos2)
1442 seqArray[i][s1] = seqArray[i][j];
1444 // Then remove the excess bit at the end of the array
1445 int numExtraElements = lengthOfSelectedSeq - s1;
1447 if((int)seqArray[i].size() > numExtraElements)
1449 for(int k = 0; k < numExtraElements; k++)
1451 seqArray[i].pop_back();
1459 void Alignment::removeGapOnlyColsFromSelectedSeqs(vector<int>* selected)
1462 int NoneSelected = -1;
1464 int sizeSelected = selected->size();
1465 int firstSeqSelected = NoneSelected;
1466 int gapPos1 = userParameters->getGapPos1();
1467 int gapPos2 = userParameters->getGapPos2();
1470 for(int i = 1; i < sizeSelected; i++)
1472 if((*selected)[i] == 1)
1475 if(firstSeqSelected == -1)
1477 firstSeqSelected = i;
1482 if(firstSeqSelected == NoneSelected)
1484 cout << "No Sequences have been selected\n";
1488 for(int i = 1; i <= getSeqLength(firstSeqSelected);)
1491 for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1493 if(getSeqLength(j) >= i)
1495 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1501 if(numGaps == numColumns)
1503 //cout << " removing a gap column\n\n";
1504 for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1506 for(k = i + 1; k <= getSeqLength(j) + 1 && (int)seqArray[j].size() > k; k++)
1508 seqArray[j][k - 1] = seqArray[j][k];
1510 seqArray[j].pop_back(); // Remove the last element!!
1512 if(getSeqLength(firstSeqSelected) <= 0)
1526 void Alignment::removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum)
1532 int gapPos1 = userParameters->getGapPos1();
1533 int gapPos2 = userParameters->getGapPos2();
1536 int numColumns = lSeq - fSeq + 1;
1538 // We must cheack each column to see if it consists of only '-'
1539 for(int i = 1; i <= getSeqLength(fSeq);)
1542 for(int j = fSeq; j <= lSeq; j++)
1544 if(getSeqLength(j) >= i)
1546 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1552 if(numGaps == numColumns)
1554 for(int j = fSeq; j <= lSeq; j++)
1556 for(k = i + 1; k <= getSeqLength(j) + 1; k++)
1558 seqArray[j][k - 1] = seqArray[j][k];
1560 seqArray[j].pop_back(); // Remove the last element!!
1564 int lengthSecStruct = secStructMask1.size();
1565 int lengthGapMask = gapPenaltyMask1.size();
1567 for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1569 secStructMask1[k - 1] = secStructMask1[k];
1571 for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1573 gapPenaltyMask1[k - 1] = gapPenaltyMask1[k];
1579 int lengthSecStruct = secStructMask2.size();
1580 int lengthGapMask = gapPenaltyMask2.size();
1582 for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1584 secStructMask2[k - 1] = secStructMask2[k];
1586 for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1588 gapPenaltyMask2[k - 1] = gapPenaltyMask2[k];
1591 if(getSeqLength(fSeq) <= 0)
1604 vector<Sequence> Alignment::cutSelectedSequencesFromAlignment(vector<int>* selected)
1606 vector<Sequence> cutSequences;
1607 int sizeOfSelected = selected->size();
1608 SeqArray::iterator seqArrayIterator;
1609 vector<string>::iterator namesIterator;
1610 vector<string>::iterator titlesIterator;
1611 vector<int>::iterator seqWeightIterator;
1612 vector<unsigned long>::iterator sequenceIdsIterator;
1614 int newProfile1NumSeqs = profile1NumSeqs;
1615 int profNum = userParameters->getProfileNum();
1616 int prof1NumSeqs = profile1NumSeqs;
1617 int numCutSoFar = 0;
1618 int intialNumSeqs = numSeqs;
1620 for(int i = 1; i < sizeOfSelected && i <= intialNumSeqs; i++)
1622 if((*selected)[i] == 1)
1624 // Cut the sequence from the alignment!
1625 seqArrayIterator = seqArray.begin() + i - numCutSoFar;
1626 namesIterator = names.begin() + i - numCutSoFar;
1627 titlesIterator = titles.begin() + i - numCutSoFar;
1628 seqWeightIterator = seqWeight.begin() + i - numCutSoFar;
1629 sequenceIdsIterator = sequenceIds.begin() + i - numCutSoFar;
1630 Sequence SeqToCut(&seqArray[i - numCutSoFar], *namesIterator, *titlesIterator,
1631 *sequenceIdsIterator);
1635 seqArrayIterator->clear();
1636 seqArray.erase(seqArrayIterator);
1637 names.erase(namesIterator);
1638 titles.erase(titlesIterator);
1639 seqWeight.erase(seqWeightIterator);
1640 sequenceIds.erase(sequenceIdsIterator);
1651 if(profNum > 0 && i <= prof1NumSeqs)
1653 if(newProfile1NumSeqs > 0)
1655 newProfile1NumSeqs--;
1659 newProfile1NumSeqs = 0;
1662 cutSequences.push_back(SeqToCut);
1665 profile1NumSeqs = newProfile1NumSeqs;
1666 setDefaultOutputIndex();
1667 resetAllSeqWeights();
1668 return cutSequences;
1671 void Alignment::setDefaultOutputIndex()
1673 outputIndex.clear();
1674 outputIndex.resize(numSeqs);
1675 for(int iseq = 1; iseq <= numSeqs; iseq++)
1677 outputIndex[iseq - 1] = iseq;
1681 bool Alignment::removeAllOutsideRange(int beginPos, int endPos)
1684 if(beginPos < 0 || endPos > getLengthLongestSequence())
1686 return false; // cannot do it!!!!
1689 // trim the seqArray
1690 ok = keepPortionOfSeqArray(beginPos, endPos);
1693 cerr << "There was a problem removing a portion of the array\n";
1697 // recalculate the maxLengths
1698 calculateMaxLengths();
1700 // Clear the histogram columns
1701 histogramColumnHeights.clear();
1703 // reset the weights
1704 resetAllSeqWeights();
1708 bool Alignment::keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex)
1710 SeqArray sectionToRealign;
1711 vector<int> emptyVec;
1712 sectionToRealign.push_back(emptyVec); // EMPTY sequence
1714 SeqArray::iterator posToAddTo = sectionToRealign.begin();
1715 // erase from all sequences the range specified here!!!!!
1716 if(beginRangeIndex < 0 || endRangeIndex < 0)
1721 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1722 SeqArray::iterator mainEndIt = seqArray.end();
1724 vector<int>::iterator begin, end, beginRange, endRange, beginCopyRange, endCopyRange;
1726 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1728 vector<int> vecToAdd;
1729 begin = mainBeginIt->begin() + 1;
1730 end = mainBeginIt->end();
1731 beginRange = begin + beginRangeIndex;
1732 endRange = begin + endRangeIndex + 1;
1733 beginCopyRange = beginRange;
1734 endCopyRange = endRange;
1736 // We need to copy all of this into another vector.
1737 if(endCopyRange < end && beginCopyRange < end)
1739 vecToAdd.push_back(0);
1740 for(; beginCopyRange != endCopyRange; beginCopyRange++)
1742 vecToAdd.push_back(*beginCopyRange);
1744 sectionToRealign.push_back(vecToAdd);
1751 if(endRange < end && beginRange < end)
1753 mainBeginIt->erase(beginRange, endRange);
1761 seqArray = sectionToRealign;
1765 void Alignment::debugPrintSeqArray(SeqArray* arrayToPrint)
1767 // I need to use iterators for everything here.
1768 SeqArray::iterator mainBeginIt = arrayToPrint->begin();
1769 SeqArray::iterator mainEndIt = arrayToPrint->end();
1770 vector<int>::iterator begin, end;
1771 string aaCodes = userParameters->getAminoAcidCodes();
1773 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1775 if(mainBeginIt->size() > 0)
1777 begin = mainBeginIt->begin() + 1;
1778 end = mainBeginIt->end();
1779 for(; begin != end; begin++)
1781 if(*begin < (int)aaCodes.size())
1783 cout << aaCodes[*begin];
1795 void Alignment::debugPrintProfile1()
1797 cout << "************** PROFILE1 *********************\n";
1798 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1799 SeqArray::iterator mainEndIt = mainBeginIt + profile1NumSeqs;
1800 vector<int>::iterator begin, end;
1801 string aaCodes = userParameters->getAminoAcidCodes();
1803 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1805 cout << "PROFILE1 SEQ: ";
1806 if(mainBeginIt->size() > 0)
1808 begin = mainBeginIt->begin() + 1;
1809 end = mainBeginIt->end();
1810 for(; begin != end; begin++)
1812 if(*begin < (int)aaCodes.size())
1814 cout << aaCodes[*begin];
1826 void Alignment::debugPrintProfile2()
1828 cout << "************** PROFILE2 *********************\n";
1829 SeqArray::iterator mainBeginIt = seqArray.begin() + 1 + profile1NumSeqs;
1830 SeqArray::iterator mainEndIt = seqArray.end();
1831 vector<int>::iterator begin, end;
1832 string aaCodes = userParameters->getAminoAcidCodes();
1834 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1836 cout << "PROFILE2 SEQ: ";
1837 if(mainBeginIt->size() > 0)
1839 begin = mainBeginIt->begin() + 1;
1840 end = mainBeginIt->end();
1841 for(; begin != end; begin++)
1843 if(*begin < (int)aaCodes.size())
1845 cout << aaCodes[*begin];
1857 bool Alignment::updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos)
1859 if(realignedSeqs.size() != seqArray.size())
1863 if(beginPos < 0 || endPos < 0)
1868 // erase from all sequences the range specified here!!!!!
1870 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1871 SeqArray::iterator mainEndIt = seqArray.end();
1873 SeqArray::iterator pasteBeginIt = realignedSeqs.begin() + 1;
1874 SeqArray::iterator pasteEndIt = realignedSeqs.end();
1876 vector<int>::iterator begin, end, beginRange, endRange;
1878 for(; mainBeginIt != mainEndIt && pasteBeginIt != pasteEndIt; mainBeginIt++)
1880 vector<int> vecToAdd;
1881 begin = mainBeginIt->begin() + 1;
1882 end = mainBeginIt->end();
1883 beginRange = begin + beginPos;
1884 endRange = begin + endPos + 1;
1886 if(endRange < end && beginRange < end)
1888 mainBeginIt->erase(beginRange, endRange);
1889 mainBeginIt->insert(beginRange, pasteBeginIt->begin() + 1,
1890 pasteBeginIt->end());
1901 bool Alignment::reloadAlignment()
1903 if(getNumSeqs() <= 0)
1907 if(userParameters->getOutputOrder() == INPUT)
1911 if((int)outputIndex.size() != getNumSeqs())
1913 outputIndex.clear();
1916 vector<int> emptyVec;
1917 string emptyString = "";
1918 SeqArray outputOrderSeqArray;
1919 outputOrderSeqArray.resize(getNumSeqs() + 1);
1920 outputOrderSeqArray[0] = emptyVec;
1921 vector<string> outputOrderNames;
1922 outputOrderNames.resize(getNumSeqs() + 1);
1923 outputOrderNames[0] = emptyString;
1924 vector<string> outputOrderTitles;
1925 outputOrderTitles.resize(getNumSeqs() + 1);
1926 outputOrderTitles[0] = emptyString;
1927 vector<unsigned long> outputOrderSequenceIds;
1928 outputOrderSequenceIds.resize(getNumSeqs() + 1);
1929 outputOrderSequenceIds[0] = 0;
1931 int size = seqArray.size();
1932 if((seqArray.size() != names.size()) || (seqArray.size() != titles.size()) ||
1933 sequenceIds.size() != names.size())
1939 // Now for each seq,
1940 for(int i = 1; i < size; i++)
1942 if(i < (int)outputOrderSeqArray.size() && i - 1 < (int)outputIndex.size() &&
1943 outputIndex[i - 1] < size)
1945 _outIndex = outputIndex[i - 1];
1946 outputOrderSeqArray[i] = seqArray[_outIndex];
1947 outputOrderNames[i] = names[_outIndex];
1948 outputOrderTitles[i] = titles[_outIndex];
1949 outputOrderSequenceIds[i] = sequenceIds[_outIndex];
1957 // Now we have a copy in the correct order.
1958 // Remove all the elements from the old ones and set them to be these arrays
1960 seqArray = outputOrderSeqArray;
1962 names = outputOrderNames;
1964 titles = outputOrderTitles;
1965 sequenceIds.clear();
1966 sequenceIds = outputOrderSequenceIds;
1971 const vector<int>* Alignment::getSequenceFromUniqueId(unsigned long id)
1973 for(int i = 0; i < (int)sequenceIds.size(); i++)
1975 if(sequenceIds[i] == id)
1977 return getSequence(i);
1981 // We have not found it, throw an exception!!!
1982 throw SequenceNotFoundException();
1987 * Mark 26-1-2007: This function was added to allow access to the unique id.
1989 void Alignment::updateSequence(int index, const vector<int>* seq)
1991 if(index >= 1 && index < (int)seqArray.size())
1993 seqArray[index] = *seq;
1999 * Mark 17-2-2007: This function was added to check if a res is a gap or not.
2001 bool Alignment::isGap(int seq, int col) const
2003 int res = seqArray[seq][col];
2004 if(res == gapPos1 || res == gapPos2)
2014 * This function will be used so that we can create an alignment object from a seqArray.
2015 * This will be used for the tree iteration.
2017 void Alignment::addSequences(SeqArray* seqVector)
2020 numSeqs = seqVector->size() - 1;
2021 vector<int> emptyVec;
2023 seqArray.push_back(emptyVec); // EMPTY sequence
2024 names.push_back(string(""));
2025 titles.push_back(string(""));
2026 sequenceIds.push_back(0);
2027 cout << "\nThere are " << numSeqs << " in the alignment obj\n";
2028 for(int i = 1; i <= numSeqs; i++)
2031 seqArray.push_back((*seqVector)[i]);
2032 titles.push_back(string(""));
2033 sequenceIds.push_back(utilityObject->getUniqueSequenceIdentifier());
2034 name << "name" << numSeqs;
2035 names.push_back(name.str());
2038 calculateMaxLengths();
2039 seqWeight.resize(numSeqs + 1, 100);