4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
7 * Output routines ported from clustalx C code in 'interface.c'.
11 * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention
12 * to define output columns, while the port uses (firsRes, lastRes). Fixed
13 * problems in all 7 output routines where the conventions were mixed giving
14 * over-long output blocks.
16 * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut
18 * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83
19 * added const NAMESWIDTH which is calculated based on the
20 * length of the longest sequence name and the upper and lower
21 * limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY
27 #include "AlignmentOutput.h"
32 AlignmentOutput::AlignmentOutput()
33 : clusSecStructOffset(9),
34 clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs
36 // NOTE in the old clustal these arrays ended with NULL as a value.
39 strongGroup.resize(9);
40 strongGroup[0] = string("STA");
41 strongGroup[1] = string("NEQK");
42 strongGroup[2] = string("NHQK");
43 strongGroup[3] = string("NDEQ");
44 strongGroup[4] = string("QHRK");
45 strongGroup[5] = string("MILV");
46 strongGroup[6] = string("MILF");
47 strongGroup[7] = string("HY");
48 strongGroup[8] = string("FYW");
51 weakGroup[0] = string("CSA");
52 weakGroup[1] = string("ATV");
53 weakGroup[2] = string("SAG");
54 weakGroup[3] = string("STNK");
55 weakGroup[4] = string("STPA");
56 weakGroup[5] = string("SGND");
57 weakGroup[6] = string("SNDEQK");
58 weakGroup[7] = string("NDEQHK");
59 weakGroup[8] = string("NEQHRK");
60 weakGroup[9] = string("FVLIM");
61 weakGroup[10] = string("HFY");
63 catch(const exception& e)
65 cerr << "An exception has occured in the contructor of AlignmentOutput.\n"
72 * The function createAlignmentOutput is used to call all the individual functions
73 * for the different file types. It is possible to output the alignment in all file
74 * types at the same time.
76 void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq)
79 int firstRes; // starting sequence range - Ramu
80 int lastRes; // ending sequence range
83 if((firstSeq <= 0) || (lastSeq < firstSeq))
85 utilityObject->error("Cannot produce alignment output."
86 " Incorrect call to createAlignmentOutput."
88 " lastSeq = %d\n", firstSeq, lastSeq);
99 length = alignPtr->getLengthLongestSequence();
102 if (userParameters->getRangeFromToSet())
104 firstRes = userParameters->getRangeFrom();
105 lastRes = userParameters->getRangeTo();
106 // Check if the numbers are ok.
107 if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1))
109 cerr << "seqrange numbers are not set properly, using default....\n";
118 if (rangeOK && (lastRes > length))
121 cout << "Seqrange " << lastRes << " is more than the " << length
122 << " setting it to " << length << " \n";
125 if (userParameters->getMenuFlag())
127 cout << "Consensus length = " << lastRes << " \n";
130 outputRegion partToOutput;
131 partToOutput._firstSeq = firstSeq;
132 partToOutput._lastSeq = lastSeq;
133 partToOutput._firstRes = firstRes;
134 partToOutput._lastRes = lastRes;
136 if(userParameters->getOutputClustal())
138 if(clustalOutFile.get() != 0 && clustalOutFile->is_open())
140 clustalOut(alignPtr, partToOutput);
141 if(clustalOutFile->is_open())
143 clustalOutFile->close();
145 utilityObject->info("CLUSTAL-Alignment file created [%s]\n",
146 clustalOutName.c_str());
149 if(userParameters->getOutputNbrf())
151 if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open())
153 nbrfOut(alignPtr, partToOutput);
154 if(nbrfOutFile->is_open())
156 nbrfOutFile->close();
158 utilityObject->info("NBRF/PIR-Alignment file created [%s]\n",
159 nbrfOutName.c_str());
162 if(userParameters->getOutputGCG())
164 if(gcgOutFile.get() != 0 && gcgOutFile->is_open())
166 gcgOut(alignPtr, partToOutput);
167 if(gcgOutFile->is_open())
171 utilityObject->info("GCG-Alignment file created [%s]\n",
175 if(userParameters->getOutputPhylip())
177 if(phylipOutFile.get() != 0 && phylipOutFile->is_open())
179 phylipOut(alignPtr, partToOutput);
180 if(phylipOutFile->is_open())
182 phylipOutFile->close();
184 utilityObject->info("PHYLIP-Alignment file created [%s]\n",
185 phylipOutName.c_str());
189 if(userParameters->getOutputGde())
191 if(gdeOutFile.get() != 0 && gdeOutFile->is_open())
193 gdeOut(alignPtr, partToOutput);
194 if(gdeOutFile->is_open())
198 utilityObject->info("GDE-Alignment file created [%s]\n",
203 if(userParameters->getOutputNexus())
205 if(nexusOutFile.get() != 0 && nexusOutFile->is_open())
207 nexusOut(alignPtr, partToOutput);
208 if(nexusOutFile->is_open())
210 nexusOutFile->close();
212 utilityObject->info("NEXUS-Alignment file created [%s]\n",
213 nexusOutName.c_str());
217 if(userParameters->getOutputFasta())
219 if(fastaOutFile.get() != 0 && fastaOutFile->is_open())
221 fastaOut(alignPtr, partToOutput);
222 if(fastaOutFile->is_open())
224 fastaOutFile->close();
226 utilityObject->info("Fasta-Alignment file created [%s]\n",
227 fastaOutName.c_str());
231 // Output the alignment to the screen if it is required.
232 if (userParameters->getShowAlign() && userParameters->getMenuFlag())
237 catch(const exception& e)
239 cerr << "An exception has occured in the createAlignmentOutput function.\n"
245 bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames)
247 if(!userParameters->getOutputClustal() &&
248 !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
249 !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
250 !userParameters->getOutputNexus() && !userParameters->getOutputFasta())
252 utilityObject->error("You must select an alignment output format\n");
256 if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == ""
257 && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == ""
258 && fileNames.phylipFile == "")
260 utilityObject->error("No names for output files. Cannot output alignment.\n");
264 if(fileNames.clustalFile != "")
266 clustalOutName = fileNames.clustalFile;
267 if(!openExplicitFile(clustalOutFile, clustalOutName))
272 if(fileNames.fastaFile != "")
274 fastaOutName = fileNames.fastaFile;
275 if(!openExplicitFile(fastaOutFile, fastaOutName))
280 if(fileNames.gcgFile != "")
282 gcgOutName = fileNames.gcgFile;
283 if(!openExplicitFile(gcgOutFile, gcgOutName))
288 if(fileNames.gdeFile != "")
290 gdeOutName = fileNames.gdeFile;
291 if(!openExplicitFile(gdeOutFile, gdeOutName))
296 if(fileNames.nexusFile != "")
298 nexusOutName = fileNames.nexusFile;
299 if(!openExplicitFile(nexusOutFile, nexusOutName))
304 if(fileNames.nrbfFile != "")
306 nbrfOutName = fileNames.nrbfFile;
307 if(!openExplicitFile(nbrfOutFile, nbrfOutName))
312 if(fileNames.phylipFile != "")
314 phylipOutName = fileNames.phylipFile;
315 if(!openExplicitFile(phylipOutFile, phylipOutName))
323 * The function openAlignmentOutput opens a file for output. It returns true if it
324 * has been successful and false if it has not been successful
326 bool AlignmentOutput::openAlignmentOutput(string path)
328 if(!userParameters->getOutputClustal() &&
329 !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
330 !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
331 !userParameters->getOutputNexus() && !userParameters->getOutputFasta())
333 utilityObject->error("You must select an alignment output format\n");
336 string _fileNameToOutput = path;
337 if(_fileNameToOutput == "")
339 /*_fileNameToOutput = userParameters->getSeqName();*/
340 /* BUG 166 , file extension, FS, 2009-01-26 */
341 utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput);
344 if(userParameters->getOutputClustal())
346 if (userParameters->getOutfileName() != "")
348 clustalOutName = userParameters->getOutfileName();
349 if(!openExplicitFile(clustalOutFile, clustalOutName))
356 clustalOutName = openOutputFile(clustalOutFile,
357 "\nEnter a name for the CLUSTAL output file ",
358 _fileNameToOutput, "aln");
360 if(clustalOutName == "")
362 return false; // We have not been successful.
367 if(userParameters->getOutputNbrf())
369 if (userParameters->getOutfileName() != "")
371 nbrfOutName = userParameters->getOutfileName();
372 if(!openExplicitFile(nbrfOutFile, nbrfOutName))
379 nbrfOutName = openOutputFile(nbrfOutFile,
380 "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput,
383 if(nbrfOutName == "")
385 return false; // We have not been successful.
390 if(userParameters->getOutputGCG())
392 if (userParameters->getOutfileName() != "")
394 gcgOutName = userParameters->getOutfileName();
395 if(!openExplicitFile(gcgOutFile, gcgOutName))
402 gcgOutName = openOutputFile(gcgOutFile,
403 "\nEnter a name for the GCG output file", _fileNameToOutput, "msf");
407 return false; // We have not been successful.
412 if(userParameters->getOutputPhylip())
414 if (userParameters->getOutfileName() != "")
416 phylipOutName = userParameters->getOutfileName();
417 if(!openExplicitFile(phylipOutFile, phylipOutName))
424 phylipOutName = openOutputFile(phylipOutFile,
425 "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy");
427 if(phylipOutName == "")
429 return false; // We have not been successful.
434 if(userParameters->getOutputGde())
436 if (userParameters->getOutfileName() != "")
438 gdeOutName = userParameters->getOutfileName();
439 if(!openExplicitFile(gdeOutFile, gdeOutName))
446 gdeOutName = openOutputFile(gdeOutFile,
447 "\nEnter a name for the GDE output file ",
448 _fileNameToOutput, "gde");
452 return false; // We have not been successful.
457 if(userParameters->getOutputNexus())
459 if (userParameters->getOutfileName() != "")
461 nexusOutName = userParameters->getOutfileName();
462 if(!openExplicitFile(nexusOutFile, nexusOutName))
469 nexusOutName = openOutputFile(nexusOutFile,
470 "\nEnter a name for the NEXUS output file ",
471 _fileNameToOutput, "nxs");
473 if(nexusOutName == "")
475 return false; // We have not been successful.
480 if(userParameters->getOutputFasta())
482 if (userParameters->getOutfileName() != "")
484 fastaOutName = userParameters->getOutfileName();
485 if(!openExplicitFile(fastaOutFile, fastaOutName))
492 fastaOutName = openOutputFile(fastaOutFile,
493 "\nEnter a name for the Fasta output file ",
494 _fileNameToOutput, "fasta");
496 if(fastaOutName == "")
498 return false; // We have not been successful.
509 * The function openExplicitFile is used to open a file when we have the name already.
510 * It returns true if the file has been opened correctly, false otherwise.
512 bool AlignmentOutput::openExplicitFile(auto_ptr<ofstream>& outFile, string fileName)
516 cerr << "Bad output file [" << fileName << "]\n";
517 utilityObject->error("Bad output file [%s]\n", fileName.c_str());
521 outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc));
522 if(!outFile->is_open())
524 utilityObject->error("Cannot open output file [%s]\n", fileName.c_str());
531 * The function openOutputFile is used to open a file when we dont have the name yet.
532 * This function returns the name if it has been successful. If it has not been
533 * successful it returns a blank string ""
535 string AlignmentOutput::openOutputFile(auto_ptr<ofstream>& outFile, string prompt,
536 string path, string fileExtension)
539 string _fileName; // Will return this name.
541 _fileName = path + fileExtension;
543 if(_fileName.compare(userParameters->getSeqName()) == 0)
545 cout << "Output file name is the same as input file.\n";
546 if (userParameters->getMenuFlag())
548 message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]";
550 utilityObject->getStr(message, temp);
557 else if (userParameters->getMenuFlag())
560 message = prompt + " [" + _fileName + "]";
561 utilityObject->getStr(message, temp);
568 outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc));
569 if(!outFile->is_open())
571 utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str());
578 * fastaOut: output the alignment in FASTA format
580 void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput)
587 int firstRes = partToOutput._firstRes;
588 int lastRes = partToOutput._lastRes;
589 int firstSeq = partToOutput._firstSeq;
590 int lastSeq = partToOutput._lastSeq;
592 cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n";
595 const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29
596 vector<char> sequence;
597 sequence.assign(lastRes + 1, '0');
599 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
601 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
602 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
604 lineLength = LINELENGTH;
608 for(ii = firstSeq; ii <= lastSeq; ii++)
610 i = alignPtr->getOutputIndex(ii - 1);
612 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
613 for(j = firstRes; j <= lastRes; j++) //- nige
615 if (j <= alignPtr->getSeqLength(i))
617 //val = alignPtr.getResidue(i, j);
618 val = (*alignment)[i][j]; // NOTE june29
624 if((val == -3) || (val == 253))
628 else if((val < 0) || (val > userParameters->getMaxAA()))
634 residue = userParameters->getAminoAcidCode(val);
636 if (userParameters->getLowercase())
638 sequence[j - firstRes] = residue;
642 sequence[j - firstRes] = residue;
646 (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i));
647 if(userParameters->getSeqRange())
649 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
650 (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end;
653 (*fastaOutFile) << "\n";
654 for(j = 1; j <= slen; j++)
656 (*fastaOutFile) << sequence[j-1];
657 if((j % lineLength == 0) || (j == slen))
659 (*fastaOutFile) << "\n";
663 fastaOutFile->close();
664 cout << "FASTA file created!\n";
666 catch(const bad_alloc& e)
668 fastaOutFile->close();
669 cerr << "A bad_alloc exception has occured in the fastaOut function.\n"
673 catch(VectorOutOfRange e)
675 fastaOutFile->close();
676 cerr << "An exception has occured in the fastaOut function.\n"
682 fastaOutFile->close();
683 cerr << "An exception has occured in the fastaOut function.\n";
690 * gcgOut: output the alignment in gcg file format
692 void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput)
696 int i, ii, chunks, block;
697 int j, k, pos1, pos2;
702 int firstRes = partToOutput._firstRes;
703 int lastRes = partToOutput._lastRes;
704 int firstSeq = partToOutput._firstSeq;
705 int lastSeq = partToOutput._lastSeq;
707 vector<char> sequence;
708 vector<int> allChecks;
709 int _maxLength = alignPtr->getMaxAlnLength();
710 sequence.assign(_maxLength + 1, '0');
711 allChecks.assign(lastSeq + 1, 0);
712 const SeqArray* alignment = alignPtr->getSeqArray();
714 for(i = firstSeq; i <= lastSeq; i++)
716 //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige
717 for(j = firstRes; j <= lastRes; j++) //- nige
719 if (j <= alignPtr->getSeqLength(i))
721 //val = alignPtr.getResidue(i, j);
722 val = (*alignment)[i][j]; // NOTE june29
728 if((val == -3) || (val == 253))
732 else if((val < 0) || (val > userParameters->getMaxAA()))
738 residue = userParameters->getAminoAcidCode(val);
740 sequence[j - firstRes + 1] = residue;
742 // pad any short sequences with gaps, to make all sequences the same length
743 for(; j <= firstRes + lastRes - 1; j++)
745 sequence[j - firstRes + 1] = '.';
747 allChecks[i] = SeqGCGCheckSum(&sequence, lastRes);
751 for(i = 1; i <= alignPtr->getNumSeqs(); i++)
753 _index = alignPtr->getOutputIndex(i - 1);
754 grandChecksum += allChecks[_index];
756 grandChecksum = grandChecksum % 10000;
758 (*gcgOutFile) << "PileUp\n\n";
759 (*gcgOutFile) << "\n\n MSF:" << setw(5) << lastRes << " Type: ";
761 if(userParameters->getDNAFlag())
763 (*gcgOutFile) << "N";
767 (*gcgOutFile) << "P";
770 (*gcgOutFile) << " Check:" << setw(6) << grandChecksum << " .. \n\n";
773 int length = lastRes - firstRes + 1; //- nige
775 for(ii = firstSeq; ii <= lastSeq; ii++)
777 i = alignPtr->getOutputIndex(ii - 1);
778 _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR;
780 (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo Len:"
781 //<< setw(5) << lastRes << " Check:" << setw(6) << allChecks[i] //- nige
782 << setw(5) << length << " Check:" << setw(6) << allChecks[i]
783 << " Weight: " << fixed << setprecision(1) << _seqWeight << "\n";
785 (*gcgOutFile) << "\n//\n";
787 chunks = length / GCG_LINELENGTH; //- nige
788 if(length % GCG_LINELENGTH != 0) //- nige
793 for(block = 1; block <= chunks; block++)
795 (*gcgOutFile) << "\n\n";
796 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
797 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige
799 for(ii = firstSeq; ii <= lastSeq; ii++)
801 i = alignPtr->getOutputIndex(ii - 1);
802 if (!userParameters->getSeqRange())
804 (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left
805 << alignPtr->getName(i) << " ";
809 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
810 std::stringstream ss;
811 std::stringstream ss2;
818 string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart;
819 nameAndRange += "-" + rangeEnd;
820 (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left
823 for(j = pos1, k = 1; j <= pos2; j++, k++)
827 //check for int sequences - pad out with '.' characters to end
829 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
831 //val = alignPtr.getResidue(i, j + firstRes - 1);
832 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
838 if((val == -3) || (val == 253))
842 else if((val < 0) || (val > userParameters->getMaxAA()))
848 residue = userParameters->getAminoAcidCode(val);
850 (*gcgOutFile) << residue;
853 (*gcgOutFile) << " ";
858 (*gcgOutFile) << "\n\n";
861 catch(const bad_alloc& e)
864 cerr << "A bad_alloc exception has occured in the gcgOut function.\n"
868 catch(VectorOutOfRange e)
871 cerr << "An exception has occured in the gcgOut function.\n"
878 cerr << "An exception has occured in the gcgOut function.\n";
883 void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput)
885 int firstRes = partToOutput._firstRes;
886 int lastRes = partToOutput._lastRes;
887 int firstSeq = partToOutput._firstSeq;
888 int lastSeq = partToOutput._lastSeq;
893 int i, ii, chunks, block;
894 int j, k, pos1, pos2;
897 vector<string> _seqNames;
898 const SeqArray* alignment = alignPtr->getSeqArray();
901 // Push on a blank string. This is because the index starts at 1 for the seqs etc..
902 _seqNames.push_back("");
906 for(i = firstSeq; i <= lastSeq; i++)
908 _seqNames.push_back(alignPtr->getName(i));
909 ii = _seqNames.size();
918 for(i = 0; i < (int)_seqNames.size() - 1; i++)
920 for(j = i + 1; j < (int)_seqNames.size(); j++)
922 if (_seqNames[i].compare(_seqNames[j]) == 0)
930 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous.");
934 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.");
938 int length = lastRes - firstRes + 1; //- nige
940 chunks = length / GCG_LINELENGTH; //- nige
941 if(length % GCG_LINELENGTH != 0) //- nige
946 (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " "
947 << setw(6) << length; //-nige
949 for(block = 1; block <= chunks; block++)
951 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
952 //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige
953 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;
954 for(ii = firstSeq; ii <= lastSeq; ii++)
956 i = alignPtr->getOutputIndex(ii - 1);
959 if(!userParameters->getSeqRange())
961 string name = alignPtr->getName(i);
962 (*phylipOutFile) << "\n" << setw(10) << left
963 << name.substr(0,10) << " ";
967 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
968 std::stringstream ss;
969 std::stringstream ss2;
976 string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
977 nameAndRange += rangeStart + "-" + rangeEnd;
978 (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
985 (*phylipOutFile) << "\n ";
987 for(j = pos1, k = 1; j <= pos2; j++, k++)
989 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
991 //val = alignPtr.getResidue(i, j + firstRes - 1);
992 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
998 if((val == -3) || (val == 253))
1002 else if((val < 0) || (val > userParameters->getMaxAA()))
1008 residue = userParameters->getAminoAcidCode(val);
1010 (*phylipOutFile) << residue;
1013 (*phylipOutFile) << " ";
1017 (*phylipOutFile) << "\n";
1020 catch(const bad_alloc& e)
1022 phylipOutFile->close();
1023 cerr << "A bad_alloc exception has occured in the phylipOut function.\n"
1024 << e.what() << "\n";
1027 catch(VectorOutOfRange e)
1029 phylipOutFile->close();
1030 cerr << "An exception has occured in the phylipOut function.\n"
1031 << e.what() << "\n";
1036 phylipOutFile->close();
1037 cerr << "An exception has occured in the phylipOut function.\n";
1042 void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput)
1044 int firstRes = partToOutput._firstRes;
1045 int lastRes = partToOutput._lastRes;
1046 int firstSeq = partToOutput._firstSeq;
1047 int lastSeq = partToOutput._lastSeq;
1052 int i, ii, chunks, block;
1053 int j, k, pos1, pos2;
1054 const SeqArray* alignment = alignPtr->getSeqArray();
1057 int length = lastRes - firstRes + 1; //- nige
1059 chunks = length / GCG_LINELENGTH; //- nige
1060 if(length % GCG_LINELENGTH != 0) //- nige
1065 (*nexusOutFile) << "#NEXUS\n";
1066 (*nexusOutFile) << "BEGIN DATA;\n";
1067 (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs()
1068 << " nchar=" << length << ";\n"; //- nige
1069 (*nexusOutFile) << "format missing=?\n";
1071 // removed - bugzilla bug 204
1073 (*nexusOutFile) << "symbols=\"";
1075 for(i = 0; i <= userParameters->getMaxAA(); i++)
1077 (*nexusOutFile) << userParameters->getAminoAcidCode(i);
1079 (*nexusOutFile) << "\"\n";
1082 (*nexusOutFile) << "interleave datatype=";
1083 bool _dnaFlag = userParameters->getDNAFlag();
1084 string _type = _dnaFlag ? "DNA " : "PROTEIN ";
1085 (*nexusOutFile) << _type;
1086 (*nexusOutFile) << "gap= -;\n";
1087 (*nexusOutFile) << "\nmatrix";
1089 for(block = 1; block <= chunks; block++)
1091 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
1092 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige
1093 for(ii = firstSeq; ii <= lastSeq; ii++)
1095 i = alignPtr->getOutputIndex(ii - 1);
1096 if (!userParameters->getSeqRange())
1098 (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left
1099 << alignPtr->getName(i) << " ";
1103 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1105 std::stringstream ss;
1106 std::stringstream ss2;
1113 string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
1114 nameAndRange += rangeStart + "-" + rangeEnd;
1115 (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
1119 for(j = pos1, k = 1; j <= pos2; j++, k++)
1121 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1123 //val = alignPtr.getResidue(i, j + firstRes - 1);
1124 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1130 if((val == -3) || (val == 253))
1134 else if((val < 0) || (val > userParameters->getMaxAA()))
1140 residue = userParameters->getAminoAcidCode(val);
1142 (*nexusOutFile) << residue;
1145 (*nexusOutFile) << "\n";
1147 (*nexusOutFile) << ";\nend;\n";
1149 catch(const bad_alloc& e)
1151 nexusOutFile->close();
1152 cerr << "A bad_alloc exception has occured in the nexusOut function.\n"
1153 << e.what() << "\n";
1156 catch(VectorOutOfRange e)
1158 nexusOutFile->close();
1159 cerr << "An exception has occured in the nexusOut function.\n"
1160 << e.what() << "\n";
1165 nexusOutFile->close();
1166 cerr << "An exception has occured in the nexusOut function.\n";
1172 * gdeOut: print out the alignment in gde file format.
1174 void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput)
1176 int firstRes = partToOutput._firstRes;
1177 int lastRes = partToOutput._lastRes;
1178 int firstSeq = partToOutput._firstSeq;
1179 int lastSeq = partToOutput._lastSeq;
1181 int length = lastRes - firstRes + 1; //- nige
1187 vector<char> _ssMask1, _ssMask2, seq;
1191 const SeqArray* alignment = alignPtr->getSeqArray();
1194 seq.assign(alignPtr->getMaxAlnLength() + 1, '0');
1196 // decide the line length for this alignment - maximum is LINELENGTH
1197 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1198 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1199 if (lineLength > LINELENGTH || lineLength <= 0)
1201 lineLength = LINELENGTH;
1204 // If we are using the secondary structure info from profile 1, set it up
1205 if (userParameters->getStructPenalties1() == SECST &&
1206 userParameters->getUseSS1() == true)
1208 int _lengthSeq1 = alignPtr->getSeqLength(1);
1209 vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1210 _ssMask1.assign(_lengthSeq1 + 10, 0);
1212 for (i = 0;i < _lengthSeq1;i++)
1214 _ssMask1[i] = _secStructMask1->at(i);
1216 printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1);
1219 // If we are using the secondary structure info from profile 2, set it up
1220 if (userParameters->getStructPenalties2() == SECST &&
1221 userParameters->getUseSS2() == true)
1223 int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1224 int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1225 _ssMask2.assign(lengthSeqProfile2 + 10, 0);
1226 vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1228 for (i=0;i < lengthSeqProfile2;i++)
1230 _ssMask2[i] = _secStructMask2->at(i);
1232 printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2);
1235 bool _dnaFlag = userParameters->getDNAFlag();
1236 string _prefix = _dnaFlag ? "#" : "%";
1238 for(ii = firstSeq; ii <= lastSeq; ii++)
1240 i = alignPtr->getOutputIndex(ii - 1);
1242 (*gdeOutFile) << _prefix;
1243 if(!userParameters->getSeqRange())
1245 (*gdeOutFile) << alignPtr->getName(i) << "\n";
1249 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1250 (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/"
1251 << rnum.start << "-" << rnum.end << "\n";
1255 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1256 for(j = firstRes; j <= lastRes; j++) //- nige
1258 if (j <= alignPtr->getSeqLength(i))
1260 //val = alignPtr.getResidue(i, j);
1261 val = (*alignment)[i][j]; // NOTE june29
1267 if((val == -3) || (val == 253))
1271 else if((val < 0) || (val > userParameters->getMaxAA()))
1277 residue = userParameters->getAminoAcidCode(val);
1279 if (userParameters->getLowercase())
1281 seq[j - firstRes] = (char)tolower((int)residue);
1285 seq[j - firstRes] = residue;
1289 for(j = 1; j <= slen; j++)
1291 (*gdeOutFile) << seq[j-1];
1292 if((j % lineLength == 0) || (j == slen))
1294 (*gdeOutFile) << "\n";
1299 if (userParameters->getOutputStructPenalties() == 0 ||
1300 userParameters->getOutputStructPenalties() == 2)
1302 if (userParameters->getStructPenalties1() == SECST &&
1303 userParameters->getUseSS1() == true)
1305 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1306 << alignPtr->getSecStructName1() << "\n";
1308 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1309 for(i = firstRes; i <= lastRes; i++) //- nige
1311 val = _ssMask1[i - 1];
1312 if (val == userParameters->getGapPos1() ||
1313 val == userParameters->getGapPos2())
1315 seq[i - firstRes] = '-';
1319 seq[i - firstRes] = val;
1323 for(i = 1; i <= lastRes; i++)
1325 (*gdeOutFile) << seq[i-1];
1326 if((i % lineLength == 0) || (i == lastRes))
1328 (*gdeOutFile) << "\n";
1333 if (userParameters->getStructPenalties2() == SECST &&
1334 userParameters->getUseSS2() == true)
1336 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1337 << alignPtr->getSecStructName2() << "\n";
1339 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1340 for(i = firstRes; i <= lastRes; i++) //- nige
1342 val = _ssMask2[i - 1];
1343 if (val == userParameters->getGapPos1() ||
1344 val == userParameters->getGapPos2())
1346 seq[i - firstRes] = '-';
1350 seq[i - firstRes] = val;
1354 //for(i = 1; i <= lastRes; i++) //- nige
1355 for(i = 1; i <= length; i++) //- nige
1357 (*gdeOutFile) << seq[i - 1];
1358 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1359 if((i % lineLength == 0) || (i == length)) //- nige
1361 (*gdeOutFile) << "\n";
1366 if (userParameters->getOutputStructPenalties() == 1 ||
1367 userParameters->getOutputStructPenalties() == 2)
1369 if (userParameters->getStructPenalties1() != NONE &&
1370 userParameters->getUseSS1() == true)
1372 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1373 << alignPtr->getSecStructName1() << "\n";
1375 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();
1376 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1377 for(i = firstRes; i <= lastRes; i++) //- nige
1379 val = _gapPenaltyMask1->at(i - 1);
1380 if (val == userParameters->getGapPos1() ||
1381 val == userParameters->getGapPos2())
1383 seq[i - firstRes] = '-';
1387 seq[i - firstRes] = val;
1391 //for(i = 1; i <= lastRes; i++) //- nige
1392 for(i = 1; i <= length; i++) //- nige
1394 (*gdeOutFile) << seq[i - 1];
1395 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1396 if((i % lineLength == 0) || (i == length)) //- nige
1398 (*gdeOutFile) << "\n";
1402 if (userParameters->getStructPenalties2() != NONE &&
1403 userParameters->getUseSS2() == true)
1405 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1406 << alignPtr->getSecStructName2() << "\n";
1408 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1409 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1410 for(i = firstRes; i < length; i++) //- nige
1412 val = _gapPenaltyMask2->at(i-1);
1413 if (val == userParameters->getGapPos1() ||
1414 val == userParameters->getGapPos2())
1416 seq[i - firstRes] = '-';
1420 seq[i - firstRes] = val;
1424 //for(i = 1; i <= lastRes; i++) //- nige
1425 for(i = 1; i <= length; i++) //- nige
1427 (*gdeOutFile) << seq[i - 1];
1428 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1429 if((i % lineLength == 0) || (i == length)) //- nige
1431 (*gdeOutFile) << "\n";
1436 gdeOutFile->close();
1438 catch(const bad_alloc& e)
1440 gdeOutFile->close();
1441 cerr << "A bad_alloc exception has occured in the gdeOut function.\n"
1442 << e.what() << "\n";
1445 catch(VectorOutOfRange e)
1447 gdeOutFile->close();
1448 cerr << "An exception has occured in the gdeOut function.\n"
1449 << e.what() << "\n";
1454 gdeOutFile->close();
1455 cerr << "An exception has occured in the gdeOut function.\n";
1460 void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput)
1468 int firstRes = partToOutput._firstRes;
1469 int lastRes = partToOutput._lastRes;
1470 int firstSeq = partToOutput._firstSeq;
1471 int lastSeq = partToOutput._lastSeq;
1475 int _maxLength = alignPtr->getMaxAlnLength();
1476 vector<char> sequence;
1477 sequence.assign(_maxLength + 1, '0');
1478 const SeqArray* alignment = alignPtr->getSeqArray();
1479 // decide the line length for this alignment - maximum is LINELENGTH
1480 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1481 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1482 if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07
1484 lineLength = LINELENGTH;
1487 // Get name prefix. DL if DNA, P1 if protein
1489 bool _dnaFlag = userParameters->getDNAFlag();
1490 namePrefix = _dnaFlag ? ">DL;" : ">P1;";
1492 //int length = lastRes - firstRes + 1; //- nige
1494 for(ii = firstSeq; ii <= lastSeq; ii++)
1496 i = alignPtr->getOutputIndex(ii - 1);
1498 (*nbrfOutFile) << namePrefix;
1500 if (!userParameters->getSeqRange())
1502 (*nbrfOutFile) << alignPtr->getName(i) << "\n"
1503 << alignPtr->getTitle(i) << "\n";
1507 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1508 (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start
1509 << "-"<< rnum.end << "\n"
1510 << alignPtr->getTitle(i) << "\n";
1513 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1514 for(j = firstRes; j <= lastRes; j++) //- nige
1516 // NOTE I changed this here!!!!!
1517 if (j <= alignPtr->getSeqLength(i))
1519 //val = alignPtr.getResidue(i, j);
1520 val = (*alignment)[i][j]; // NOTE june29
1526 if((val == -3) || (val == 253))
1530 else if((val < 0) || (val > userParameters->getMaxAA()))
1536 residue = userParameters->getAminoAcidCode(val);
1538 sequence[j - firstRes] = residue;
1541 for(j = 1; j <= slen; j++)
1543 (*nbrfOutFile) << sequence[j - 1];
1544 if((j % lineLength == 0) || (j == slen))
1546 (*nbrfOutFile) << "\n";
1549 (*nbrfOutFile) << "*\n";
1551 nbrfOutFile->close();
1553 catch(const bad_alloc& e)
1555 nbrfOutFile->close();
1556 cerr << "A bad_alloc exception has occured in the nbrfOut function.\n"
1557 << e.what() << "\n";
1560 catch(VectorOutOfRange e)
1562 nbrfOutFile->close();
1563 cerr << "An exception has occured in the nbrfOut function.\n"
1564 << e.what() << "\n";
1569 nbrfOutFile->close();
1570 cerr << "An exception has occured in the nbrfOut function.\n";
1576 * The function clustalOut is used to ouput the Alignment into a clustal format
1579 void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput)
1581 int firstRes = partToOutput._firstRes;
1582 int lastRes = partToOutput._lastRes;
1583 int firstSeq = partToOutput._firstSeq;
1584 int lastSeq = partToOutput._lastSeq;
1590 vector<int> printSeqNo;
1591 vector<char> ss_mask1, ss_mask2;
1592 const SeqArray* alignment = alignPtr->getSeqArray();
1596 int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks;
1603 string sequenceLine = "";
1606 int _numSequences = alignPtr->getNumSeqs();
1607 // PMcG revert to Clustalw1.83 output format for name width
1608 const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY);
1610 seqNo.assign(_numSequences + 1, 0);
1611 printSeqNo.assign(_numSequences + 1, 0);
1613 // check that lastSeq <= _numSequences
1614 if(lastSeq > _numSequences)
1616 lastSeq = _numSequences;
1619 for (i = firstSeq; i <= lastSeq; i++)
1621 printSeqNo[i] = seqNo[i] = 0;
1622 for(j = 1; j < firstRes; j++)
1624 //val = alignPtr.getResidue(i, j);
1625 val = (*alignment)[i][j]; // NOTE june29
1626 if((val >= 0) || (val <= userParameters->getMaxAA()))
1633 seq1.assign(alignPtr->getMaxAlnLength() + 1, 0);
1634 // Check if we have secondary structure in file 1 and if we want to output it.
1635 if (userParameters->getStructPenalties1() == SECST &&
1636 userParameters->getUseSS1() == true)
1638 int lengthSeq1 = alignPtr->getSeqLength(1);
1639 ss_mask1.assign(lengthSeq1 + 10, 0);
1640 vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1642 for (i = 0; i < lengthSeq1; i++)
1644 ss_mask1[i] = _secStructMask1->at(i);
1646 printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1);
1650 // Check if we have secondary structure in file 2 and if we want to output it.
1651 if (userParameters->getStructPenalties2() == SECST &&
1652 userParameters->getUseSS2() == true &&
1653 userParameters->getProfile2Empty() == false)
1655 // AW added test getProfile2Empty was meant to fix bug
1656 // 141, but only prevents segfault, output still faulty.
1657 // Has to be fixed somewhere else
1658 int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1659 int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1660 ss_mask2.assign(lengthSeqProfile2 + 10, 0);
1661 vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1662 for (i = 0; i < lengthSeqProfile2; i++)
1664 ss_mask2[i] = _secStructMask2->at(i);
1666 printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2);
1669 (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel()
1670 << " multiple sequence alignment\n\n";
1672 // decide the line length for this alignment - maximum is LINELENGTH
1674 //PMcG 9-2-2008 make line output same as 1.83
1675 //lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1676 lineLength = PAGEWIDTH - NAMESWIDTH;
1677 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1678 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
1680 lineLength = LINELENGTH;
1683 int length = lastRes - firstRes + 1; //- nige
1685 chunks = length / lineLength; //- nige
1686 if(length % lineLength != 0) //- nige
1690 //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n",
1691 // firstRes, lastRes, length, chunks);
1693 // This will loop through each of the blocks.
1694 for(lv1 = 1; lv1 <= chunks; ++lv1)
1696 // pos is begining of chunk, ptr is the end of the chunk to be displayed.
1697 pos = ((lv1 - 1) * lineLength) + 1;
1698 ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige
1700 (*clustalOutFile) << "\n";
1701 int _outStructPenalty = userParameters->getOutputStructPenalties();
1703 string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07
1705 if((int)secStructName1.size() > MAXNAMESTODISPLAY)
1707 //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY);
1708 secStructName1 = secStructName1.substr(0, NAMESWIDTH);
1710 if (_outStructPenalty == 0 || _outStructPenalty == 2)
1712 if (userParameters->getStructPenalties1() == SECST &&
1713 userParameters->getUseSS1() == true)
1715 for(i = pos; i <= ptr; ++i)
1717 // Check if we can access mask position first
1718 if((int)ss_mask1.size() > i + firstRes - 2)
1720 val = ss_mask1[i + firstRes - 2];
1721 if (val == userParameters->getGapPos1() ||
1722 val == userParameters->getGapPos2())
1724 temp[i - pos] = '-';
1728 temp[i - pos] = val;
1736 temp[i - pos] = EOS;
1738 if(userParameters->getSeqRange())
1740 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1741 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1742 clusSecStructOffset)
1743 << left << secStructName1
1744 << " " << temp << "\n";
1748 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1749 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1750 << left << secStructName1 << " "
1755 if (_outStructPenalty == 1 || _outStructPenalty == 2)
1757 if (userParameters->getStructPenalties1() != NONE &&
1758 userParameters->getUseSS1() == true)
1760 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();
1761 for(i = pos; i <= ptr; ++i)
1763 if((int)_gapPenaltyMask1->size() > i + firstRes - 2)
1765 val = _gapPenaltyMask1->at(i + firstRes - 2);
1766 if (val == userParameters->getGapPos1() ||
1767 val == userParameters->getGapPos2())
1769 temp[i - pos] = '-';
1773 temp[i - pos] = val;
1781 temp[i - pos] = EOS;
1783 //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1784 (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1785 << secStructName1 << " "
1791 string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07
1792 //if((int)secStructName2.size() > MAXNAMESTODISPLAY)
1793 if((int)secStructName2.size() > NAMESWIDTH)
1795 //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY);
1796 secStructName2 = secStructName2.substr(0, NAMESWIDTH);
1799 if (_outStructPenalty == 0 || _outStructPenalty == 2)
1801 if (userParameters->getStructPenalties2() == SECST &&
1802 userParameters->getUseSS2() == true)
1804 for(i = pos; i <= ptr; ++i)
1806 if((int)ss_mask2.size() > i + firstRes - 2)
1808 val = ss_mask2[i + firstRes - 2];
1809 if (val == userParameters->getGapPos1() ||
1810 val == userParameters->getGapPos2())
1812 temp[i - pos] = '-';
1816 temp[i - pos] = val;
1824 temp[i - pos] = EOS;
1827 if (userParameters->getSeqRange())
1829 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1830 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1831 clusSecStructOffset)
1832 << left << secStructName2;
1833 (*clustalOutFile) << " " << temp << "\n";
1837 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1838 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1839 << left << secStructName2 << " "
1845 if (_outStructPenalty == 1 || _outStructPenalty == 2)
1847 if (userParameters->getStructPenalties2() != NONE &&
1848 userParameters->getUseSS2() == true)
1850 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1851 for(i = pos; i <= ptr; ++i)
1853 if((int)_gapPenaltyMask2->size() > i + firstRes - 2)
1855 val = _gapPenaltyMask2->at(i + firstRes - 2);
1856 if (val == userParameters->getGapPos1() ||
1857 val == userParameters->getGapPos2())
1859 temp[i - pos] = '-';
1863 temp[i - pos] = val;
1871 temp[i - pos] = EOS;
1873 //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1874 (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1875 << secStructName2 << " "
1880 for(ii = firstSeq; ii <= lastSeq; ++ii)
1882 i = alignPtr->getOutputIndex(ii - 1);
1884 for(j = pos; j <= ptr; ++j)
1886 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1888 //val = alignPtr.getResidue(i, j + firstRes - 1);
1889 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1895 if((val == -3) || (val == 253))
1899 else if((val < 0) || (val > userParameters->getMaxAA()))
1905 seq1[j] = userParameters->getAminoAcidCode(val);
1910 for(; j <= ptr; ++j)
1917 for(int index = pos; index < ptr + 1; index++)
1919 sequenceLine += seq1[index];
1922 string seqName = alignPtr->getName(i);
1923 //if((int)seqName.size() > MAXNAMESTODISPLAY)
1924 if((int)seqName.size() > NAMESWIDTH)
1926 //seqName = seqName.substr(0, MAXNAMESTODISPLAY);
1927 seqName = seqName.substr(0, NAMESWIDTH);
1930 if (!userParameters->getSeqRange())
1932 // NOTE I made a change here from + 5, to + 6.
1933 //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left
1934 // << alignPtr->getName(i);
1936 //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left
1938 // PMcG changed this to revert to behaviour of clustalw1.83
1940 //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6)
1941 (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName;
1945 else // Put the sequence range information in!
1947 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1948 string nameOnly = nameonly(seqName);
1949 std::stringstream ss;
1950 std::stringstream ss2;
1957 tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd;
1958 //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset)
1959 (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset)
1963 (*clustalOutFile) << sequenceLine;
1965 if (userParameters->getClSeqNumbers() && printSeqNo[i])
1967 (*clustalOutFile) << " " << seqNo[i];
1970 (*clustalOutFile) << "\n";
1973 // Now print out the conservation information!
1974 for(i = pos; i <= ptr; ++i)
1979 for(j = 1; j <= (int)strongGroup.size(); j++)
1981 catident1[j - 1] = 0;
1983 for(j = 1; j <= (int)weakGroup.size(); j++)
1985 catident2[j - 1] = 0;
1988 for(j = firstSeq; j <= lastSeq; ++j)
1990 if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) &&
1991 (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq)))
1993 if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) &&
1994 ((*alignment)[firstSeq][i + firstRes - 1] <=
1995 userParameters->getMaxAA()))
1997 // Count how many are identical to the first sequence
1998 if((*alignment)[firstSeq][i + firstRes - 1] ==
1999 (*alignment)[j][i + firstRes - 1])
2003 // Count how many are in the same category.
2004 for(k = 1; k <= (int)strongGroup.size(); k++)
2006 for(l = 0; (c = strongGroup[k - 1][l]); l++)
2008 int resCode = (*alignment)[j][i + firstRes - 1];
2009 if(resCode <= userParameters->getMaxAA() + 1)
2011 if (userParameters->getAminoAcidCode(resCode) == c)
2019 for(k = 1; k <= (int)weakGroup.size(); k++)
2021 for(l = 0; (c = weakGroup[k - 1][l]); l++)
2023 int resCode = (*alignment)[j][i + firstRes - 1];
2024 if(resCode <= userParameters->getMaxAA() + 1)
2026 if (userParameters->getAminoAcidCode(resCode) == c)
2038 // Now do the conservation part for each block.
2039 if(ident == lastSeq - firstSeq + 1)
2041 seq1[i] = '*'; // All residues the same!
2043 else if (!userParameters->getDNAFlag())
2045 for(k = 1; k <= (int)strongGroup.size(); k++)
2047 if (catident1[k - 1] == lastSeq - firstSeq + 1)
2049 seq1[i] = ':'; // All residues member of the same category
2055 for(k = 1; k <= (int)weakGroup.size(); k++)
2057 if (catident2[k - 1] == lastSeq - firstSeq + 1)
2059 seq1[i] = '.'; // All residues member of the same category
2068 for(int index = pos; index < ptr + 1; index++)
2070 sequenceLine += seq1[index];
2073 //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++)
2074 for(k = 0; k < NAMESWIDTH + 6; k++)
2076 (*clustalOutFile) << " ";
2078 if(userParameters->getSeqRange())
2081 (*clustalOutFile) << " ";
2084 (*clustalOutFile) << sequenceLine << "\n";
2086 clustalOutFile->close();
2088 catch(const bad_alloc& e)
2090 clustalOutFile->close();
2091 cerr << "A bad_alloc exception has occured in the clustalOut function.\n"
2092 << e.what() << "\n";
2095 catch(VectorOutOfRange e)
2097 clustalOutFile->close();
2098 cerr << "An exception has occured in the clustalOut function.\n"
2099 << e.what() << "\n";
2104 clustalOutFile->close();
2105 cerr << "An exception has occured in the clustalOut function.\n"
2106 << e.what() << "\n";
2111 clustalOutFile->close();
2112 cerr << "An exception has occured in the clustalOut function.\n";
2118 * The function printSecStructMask takes in the 2 mask vectors. It makes changes to
2119 * mask and puts the result in structMask. This is the one that will be printed out.
2120 * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no
2121 * way to do random access setting in vectors otherwise.
2123 void AlignmentOutput::printSecStructMask(int prfLength, vector<char>* mask,
2124 vector<char>* structMask)
2127 // calculate the gap penalty mask from the secondary structures
2132 while (i < prfLength)
2134 if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$')
2136 for (j = 0; j < userParameters->getHelixEndMinus(); j++)
2138 if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a'
2139 && mask->at(i+j) != '$'))
2143 (*structMask)[i+j] = 'a';
2146 while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$')
2150 if (mask->at(i) == '$')
2152 (*structMask)[i] = 'A';
2157 (*structMask)[i] = (*mask)[i];
2160 for (j = 0; j < userParameters->getHelixEndMinus(); j++)
2162 if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a'
2163 || mask->at(i-j-1) == '$'))
2165 (*structMask)[i - j - 1] = 'a';
2169 else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%')
2171 for (j = 0; j < userParameters->getStrandEndMinus(); j++)
2173 if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b'
2174 && mask->at(i+j) != '%'))
2178 (*structMask)[i+j] = 'b';
2181 while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%')
2185 if (mask->at(i) == '%')
2187 (*structMask)[i] = 'B';
2193 (*structMask)[i] = mask->at(i);
2197 for (j = 0; j < userParameters->getStrandEndMinus(); j++)
2199 if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' ||
2200 mask->at(i-j-1) == '%'))
2202 (*structMask)[i-j-1] = 'b';
2212 catch(const exception& e)
2214 // Catch all the exceptions
2215 cerr << "There has been an exception in the function printSecStructMask\n"
2216 << e.what() << "\n";
2222 * The function findRangeValues is used to find the range to be printed out.
2226 void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes,
2227 int len, int firstSeq)
2230 assert(firstRes > 0);
2232 assert(firstSeq > 0);
2240 char tmpName[FILENAMELEN + 15];
2242 int iEnd = 0; // to print sequence start-end with names
2252 const SeqArray* alignment = alignPtr->getSeqArray();
2258 i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere!
2259 string name = alignPtr->getName(i);
2261 if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3))
2266 for(tmpk = 1; tmpk < firstRes; tmpk++)
2268 // do this irrespective of above sscanf
2269 //val = alignPtr.getResidue(i, tmpk);
2270 val = (*alignment)[i][tmpk]; // NOTE june29
2271 if ((val < 0) || (val > userParameters->getMaxAA()))
2278 for(j = firstRes; j < firstRes + len; j++)
2280 if(j > alignPtr->getSeqLength(i))
2282 val = -3; // Cant get it so break out.
2286 //val = alignPtr.getResidue(i, j);
2287 val = (*alignment)[i][j]; // NOTE june29
2289 if((val == -3) || (val == 253))
2293 else if((val < 0) || (val > userParameters->getMaxAA()))
2301 if ( found && (iStart == 0) )
2308 if(userParameters->getSeqRange())
2310 cout << "Name : " << alignPtr->getName(i) << " "
2311 << "\n firstRes = "<< firstRes << " "
2312 << " len = " << len << " "
2313 << "\n iStart = " << iStart << " "
2314 << "\n tmpStart = " << tmpStart << " "
2315 << "\n ngaps = " << ngaps << " "
2316 << "\n pregaps = " << pregaps << " ";
2319 formula = iStart - pregaps;
2323 formula = iStart - pregaps + ( tmpStart == 1 ? 0: tmpStart-1) ;
2326 cout << "\n\nsuggestion iStart - pregaps + tmpStart - ntermgaps = "
2327 << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps
2328 << " formula " << formula << " ";
2332 cerr << "\n no range found .... strange, iStart = " << iStart;
2335 if (pregaps == firstRes - 1) // all gaps - now the conditions........
2337 formula = tmpStart ; // keep the previous start...
2339 formula = (formula <= 0) ? 1: formula;
2340 if (pregaps == 0 && tmpStart == 0)
2344 iEnd = formula + len - ngaps -1;
2345 rnum->start = formula;
2347 cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start
2348 << " - " << rnum->end;
2349 cout << " Done checking.........";
2351 catch(VectorOutOfRange e)
2353 cerr << "An exception has occured in the findRangeValues function.\n"
2354 << e.what() << "\n";
2359 cerr << "An exception has occured in findRangeValues function\n";
2364 string AlignmentOutput::nameonly(string s)
2371 while (i < (int)s.size())
2384 catch(const exception& e)
2386 cerr << "An exception has occured in the function nameonly\n"
2393 int AlignmentOutput::SeqGCGCheckSum(vector<char>* seq, int length)
2397 int seqResIndex = 1;
2398 //int seqLength = seq->size();
2400 for (i = 0, check = 0; i < length; i++, seqResIndex++)
2402 char _toUpperCase = (*seq)[seqResIndex];
2403 check += ((i % 57) + 1) * toupper(_toUpperCase);
2406 return (check % 10000);
2409 void AlignmentOutput::showAlign()
2413 char temp[MAXLINE + 1];
2418 if(userParameters->getOutputClustal())
2420 fileName = clustalOutName;
2422 else if(userParameters->getOutputNbrf())
2424 fileName = nbrfOutName;
2426 else if(userParameters->getOutputGCG())
2428 fileName = gcgOutName;
2430 else if(userParameters->getOutputPhylip())
2432 fileName = phylipOutName;
2434 else if(userParameters->getOutputGde())
2436 fileName = gdeOutName;
2438 else if(userParameters->getOutputNexus())
2440 fileName = nexusOutName;
2442 else if(userParameters->getOutputFasta())
2444 fileName = fastaOutName;
2448 return; // No file output type!
2452 _fileIn.open(fileName.c_str(), ios::in);
2453 _fileIn.seekg(0, std::ios::beg); // start at the beginning
2458 while(_fileIn.getline(temp, MAXLINE + 1, '\n'))
2460 //fputs(temp,stdout);
2461 cout << temp << "\n";
2463 if(numLines >= PAGE_LEN)
2466 utilityObject->getStr(string("Press [RETURN] to continue or X to stop"), answer);
2467 if(toupper(answer[0]) == 'X')
2480 utilityObject->getStr(string("Press [RETURN] to continue"), answer);