4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
7 * The Alignment class is used to store the alignment that is being constructed.
8 * It also contains other information such as gap penalty masks etc.
9 * An object of this type will be passed by reference to the FileReader. This FileReader
10 * and the FileParsers will then set it up properly from the information given in the file.
11 * I have decided to put everything into vectors, string etc. No more array*'s, gets rid
12 * of the memory allocation problem.
15 * Mark Jan 16th 2007. I have changed the pasteSequencesIntoPosition function to allow
16 * explicit pastes into profile2.
17 * Mark 25-1-2007. I have changed the class so that each of the sequences have a unique
18 * identifier. Several functions were changed to allow this.
20 * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to
21 * process the name vector.
23 * 23-03-07,Nigel Brown(EMBL): added testUniqueNames() predicate, which
24 * compares new sequence names with those in the alignment vector BEFORE
28 // NOTE NOTE NOTE Very important! The list of sequences begins from 1 to numSeqs.
29 // This is because of the fact that the code was written in Fortran where arrays begin at
30 // 1. It has become difficult to change this. Ramu has tried before and had problems
31 // so we decided to leave it this way.
42 #include "../substitutionMatrix/globalmatrix.h"
43 #include "../general/userparams.h"
44 #include "../general/VectorOutOfRange.h"
45 #include "../general/SequenceNotFoundException.h"
48 // FIXME because this object is used for aligned and unaligned
49 // sequences it would be nice to have a isAligned flag here (AW)
56 typedef std::vector<vector <int> > SeqArray;
63 void addSequences(vector<Sequence>* seqVector);
64 void addSequences(SeqArray* seqVector);
65 void appendSequences(vector<Sequence>* seqVector);
66 vector<Sequence> cutSelectedSequencesFromAlignment(vector<int>* selected);
67 void pasteSequencesIntoPosition(vector<Sequence>* seqVector, int pos,
68 bool explicitPasteToProfile2 = false);
70 void resizeSeqArray(int size){seqArray.resize(size); numSeqs = size - 1;
71 outputIndex.resize(size - 1); names.resize(size);
72 titles.resize(size);};
73 bool addOutputIndex(vector<int>* outputIndexToAdd);
74 bool appendOutputIndex(vector<int>* outputIndexToAppend);
75 void addSecStructMask1(vector<char>* secStructMaskToAdd);
76 void addSecStructMask2(vector<char>* secStructMaskToAdd);
77 void addSeqWeight(vector<int>* _seqWeight);
78 void addGapPenaltyMask1(vector<char>* gapPenaltyMaskToAdd);
79 void addGapPenaltyMask2(vector<char>* gapPenaltyMaskToAdd);
80 vector<char>* getSecStructMask1();
81 vector<char>* getSecStructMask2();
82 const vector<int>* getOutputIndex();
83 vector<char>* getGapPenaltyMask1();
84 vector<char>* getGapPenaltyMask2();
85 void addSecStructName1(string nameToAdd);
86 void addSecStructName2(string nameToAdd);
88 int countGaps(int s1, int s2, int l);
91 float countid(int s1, int s2);
93 const vector<int>* getSequence(int index){return &seqArray[index];}; // For Pairwise!
94 const vector<int>* getSequence(int index) const {return &seqArray[index];};
95 const vector<int>* getSequenceFromUniqueId(unsigned long id); // For iteration
96 const SeqArray* getSeqArray() const {return &seqArray;}; // For multiple align!
97 SeqArray* getSeqArrayForRealloc(){return &seqArray;};
98 void updateSequence(int index, const vector<int>* seq);
100 bool checkAllNamesDifferent(string *offendingSeq);
101 bool testUniqueNames(vector<Sequence>* seqVector, string *offendingSeq);
102 void clearAlignment();
103 void clearSecStruct1();
104 void clearSecStruct2();
105 void printSequencesAddedInfo();
107 string getSecStructName1();
108 string getSecStructName2();
109 int getNumSeqs() const {return numSeqs;};
111 int getMaxAlnLength(){return maxAlignmentLength;};
112 void setMaxAlnLength(int len){maxAlignmentLength = len;};
113 int getLengthLongestSequence();
114 int getLengthLongestSequence(int firstSeq, int lastSeq);
115 int getSeqLength(int index) const {return seqArray[index].size() - 1;};
116 int getSecStructMask1Element(int index);
117 int getSecStructMask2Element(int index);
118 int getGapPenaltyMask1Element(int index);
119 int getGapPenaltyMask2Element(int index);
120 int getOutputIndex(int index);
121 int getSeqWeight(int index) const;
122 const vector<int>* getSeqWeights() const{return &seqWeight;}
123 string getName(int index);
124 string getTitle(int index);
125 vector<int>* QTcalcHistColumnHeights(int firstSeq, int nSeqs,
126 Array2D<int>* exceptionalRes);
127 // NOTE July 13, for Qt
129 // NOTE the following functions are to be used when we are doing a profile
130 // alignment. It resets the gaps from fixed.
131 void resetProfile1();
132 void resetProfile2();
133 void resetAllSeqWeights();
135 int searchForString(bool* found, int seq, int beginRes, string search);
136 void removeGapsFromSelectedSeqs(vector<int>* selected);
137 void removeGapOnlyColsFromSelectedSeqs(vector<int>* selected);
138 void removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum);
139 void setDefaultOutputIndex();
140 bool removeAllOutsideRange(int beginPos, int endPos);
141 bool updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos);
142 bool reloadAlignment();
144 int getProfile1NumSeqs(){return profile1NumSeqs;};
145 void setProfile1NumSeqs(int value){profile1NumSeqs = value;}
146 bool isGap(int seq, int col) const;
147 void calculateMaxLengths();
150 * The following functions are for the iteration output order.
152 unsigned long getUniqueId(int seq);
154 void debugPrintArray(){debugPrintSeqArray(&seqArray);}
155 void debugPrintSeqArray(SeqArray* arrayToPrint);
156 void debugPrintProfile1();
157 void debugPrintProfile2();
158 void debugPrintOutAlignInfo();
159 void debugPrintAllNames();
160 void debugPrintSequences();
166 friend class NameIterator;
171 Alignment *alignment;
172 vector<string>::iterator i;
174 void begin(Alignment *alignment);
181 void addSequencesToVector(vector<Sequence>* seqVector);
182 int getSequenceLength(int index);
183 void sortScores(vector<float>* scores, int f, int l);
184 void swap(vector<float>* scores, int s1, int s2);
185 bool keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex);
187 void clearSeqArray();
190 int maxAlignmentLength;
191 int lengthLongestSequence;
193 vector<int> outputIndex;
194 vector<unsigned long> sequenceIds; // Mark change: To help with output order
195 vector<int> seqWeight;
197 vector<string> names;
198 vector<string> titles;
199 vector<char> gapPenaltyMask1;
200 vector<char> gapPenaltyMask2;
201 vector<char> secStructMask1;
202 vector<char> secStructMask2;
203 string secStructName1;
204 string secStructName2;
205 vector<int> histogramColumnHeights; // NOTE July 13, for Qt
207 int gapPos1, gapPos2;