4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
24 Sequence::Sequence(string& seq, string& name, string& title)
26 copyStringIntoVector(&_sequence, &seq);
30 identifier = utilityObject->getUniqueSequenceIdentifier();
33 Sequence::Sequence(std::string& seq, std::string& name, std::string& title, unsigned long id)
35 copyStringIntoVector(&_sequence, &seq);
43 * This is an overloaded contructor that is used to construct a seq object from an
44 * encoded sequenced instead of a string.
45 * @param encodedSequence
48 * @param id The unique identifier from the previous sequence!!!
51 Sequence::Sequence(std::vector<int>* encodedSequence, std::string& name, std::string& title,
54 _encodedSequence = *encodedSequence;
63 void Sequence::encodeSequence()
65 /* code seq as ints .. use gapPos2 for gap */
66 std::vector<char>::iterator it;
68 _encodedSequence.push_back(0);
70 for(it = _sequence.begin(); it != _sequence.end(); ++it)
74 _encodedSequence.push_back(userParameters->getGapPos2());
78 _encodedSequence.push_back(userParameters->resIndex(
79 userParameters->getAminoAcidCodes(), *it));
89 void Sequence::copyStringIntoVector(vector<char>* _vectorTo, string* _stringFrom)
93 for(int i = 0; i < (int)_stringFrom->size(); i++)
95 _vectorTo->push_back(_stringFrom->at(i));
98 if(_vectorTo->size() != _stringFrom->size())
100 std::cerr << "Error: In function copyStringIntoVector. Strings different length!\n";
108 void Sequence::printSequence()
110 std::cout << "This is the sequence and the encoded sequence " << _name << std::endl;
112 std::vector<char>::iterator itChar;
113 for(itChar = _sequence.begin(); itChar != _sequence.end(); ++itChar)
119 std::vector<int>::iterator itInt;
120 for(itInt = _encodedSequence.begin(); itInt != _encodedSequence.end(); ++itInt)
122 cout << " " << *itInt;
130 void Sequence::checkIntegrity()
132 // The sequences should be the same length.
133 if(_sequence.size() != _encodedSequence.size())
135 std::cerr << "Error: _sequence is not same size as _encodedSequence\n";
142 * @return the encoded sequence, this is what is used in the pairwise!
144 std::vector<int>* Sequence::getSequence()
146 return &_encodedSequence;
153 std::string Sequence::getName()
162 std::string Sequence::getTitle()
171 bool Sequence::isEmpty()
173 if(_sequence.size() == 0)
188 bool Sequence::checkDNAFlag()
189 // check if DNA or Protein
190 // The decision is based on counting all A,C,G,T,U or N.
191 // If >= 85% of all characters (except -) are as above => DNA
193 int c, numResidues, numBases;
195 string dna_codes = "ACGTUN";
197 numResidues = numBases = 0;
199 vector<char>::iterator seqIterator = _sequence.begin();
201 while (seqIterator != _sequence.end())
203 if (*seqIterator != '-')
206 if (*seqIterator == 'N')
212 c = userParameters->resIndex(dna_codes, *seqIterator);
222 if ((numBases == 0) || (numResidues == 0))
226 ratio = (float)numBases / (float)numResidues;