4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
7 * Output routines ported from clustalx C code in 'interface.c'.
11 * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention
12 * to define output columns, while the port uses (firsRes, lastRes). Fixed
13 * problems in all 7 output routines where the conventions were mixed giving
14 * over-long output blocks.
16 * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut
18 * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83
19 * added const NAMESWIDTH which is calculated based on the
20 * length of the longest sequence name and the upper and lower
21 * limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY
27 #include "AlignmentOutput.h"
32 AlignmentOutput::AlignmentOutput()
33 : clusSecStructOffset(9),
34 clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs
36 // NOTE in the old clustal these arrays ended with NULL as a value.
39 strongGroup.resize(9);
40 strongGroup[0] = string("STA");
41 strongGroup[1] = string("NEQK");
42 strongGroup[2] = string("NHQK");
43 strongGroup[3] = string("NDEQ");
44 strongGroup[4] = string("QHRK");
45 strongGroup[5] = string("MILV");
46 strongGroup[6] = string("MILF");
47 strongGroup[7] = string("HY");
48 strongGroup[8] = string("FYW");
51 weakGroup[0] = string("CSA");
52 weakGroup[1] = string("ATV");
53 weakGroup[2] = string("SAG");
54 weakGroup[3] = string("STNK");
55 weakGroup[4] = string("STPA");
56 weakGroup[5] = string("SGND");
57 weakGroup[6] = string("SNDEQK");
58 weakGroup[7] = string("NDEQHK");
59 weakGroup[8] = string("NEQHRK");
60 weakGroup[9] = string("FVLIM");
61 weakGroup[10] = string("HFY");
63 catch(const exception& e)
65 cerr << "An exception has occured in the contructor of AlignmentOutput.\n"
72 * The function createAlignmentOutput is used to call all the individual functions
73 * for the different file types. It is possible to output the alignment in all file
74 * types at the same time.
76 void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq)
79 int firstRes; // starting sequence range - Ramu
80 int lastRes; // ending sequence range
83 if((firstSeq <= 0) || (lastSeq < firstSeq))
85 utilityObject->error("Cannot produce alignment output."
86 " Incorrect call to createAlignmentOutput."
88 " lastSeq = %d\n", firstSeq, lastSeq);
99 length = alignPtr->getLengthLongestSequence();
102 if (userParameters->getRangeFromToSet())
104 firstRes = userParameters->getRangeFrom();
105 lastRes = userParameters->getRangeTo();
106 // Check if the numbers are ok.
107 if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1))
109 cerr << "seqrange numbers are not set properly, using default....\n";
118 if (rangeOK && (lastRes > length))
121 cout << "Seqrange " << lastRes << " is more than the " << length
122 << " setting it to " << length << " \n";
125 if (userParameters->getMenuFlag())
127 cout << "Consensus length = " << lastRes << " \n";
130 outputRegion partToOutput;
131 partToOutput._firstSeq = firstSeq;
132 partToOutput._lastSeq = lastSeq;
133 partToOutput._firstRes = firstRes;
134 partToOutput._lastRes = lastRes;
136 if(userParameters->getOutputClustal())
138 if(clustalOutFile.get() != 0 && clustalOutFile->is_open())
140 clustalOut(alignPtr, partToOutput);
141 if(clustalOutFile->is_open())
143 clustalOutFile->close();
145 utilityObject->info("CLUSTAL-Alignment file created [%s]\n",
146 clustalOutName.c_str());
149 if(userParameters->getOutputNbrf())
151 if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open())
153 nbrfOut(alignPtr, partToOutput);
154 if(nbrfOutFile->is_open())
156 nbrfOutFile->close();
158 utilityObject->info("NBRF/PIR-Alignment file created [%s]\n",
159 nbrfOutName.c_str());
162 if(userParameters->getOutputGCG())
164 if(gcgOutFile.get() != 0 && gcgOutFile->is_open())
166 gcgOut(alignPtr, partToOutput);
167 if(gcgOutFile->is_open())
171 utilityObject->info("GCG-Alignment file created [%s]\n",
175 if(userParameters->getOutputPhylip())
177 if(phylipOutFile.get() != 0 && phylipOutFile->is_open())
179 phylipOut(alignPtr, partToOutput);
180 if(phylipOutFile->is_open())
182 phylipOutFile->close();
184 utilityObject->info("PHYLIP-Alignment file created [%s]\n",
185 phylipOutName.c_str());
189 if(userParameters->getOutputGde())
191 if(gdeOutFile.get() != 0 && gdeOutFile->is_open())
193 gdeOut(alignPtr, partToOutput);
194 if(gdeOutFile->is_open())
198 utilityObject->info("GDE-Alignment file created [%s]\n",
203 if(userParameters->getOutputNexus())
205 if(nexusOutFile.get() != 0 && nexusOutFile->is_open())
207 nexusOut(alignPtr, partToOutput);
208 if(nexusOutFile->is_open())
210 nexusOutFile->close();
212 utilityObject->info("NEXUS-Alignment file created [%s]\n",
213 nexusOutName.c_str());
217 if(userParameters->getOutputFasta())
219 if(fastaOutFile.get() != 0 && fastaOutFile->is_open())
221 fastaOut(alignPtr, partToOutput);
222 if(fastaOutFile->is_open())
224 fastaOutFile->close();
226 utilityObject->info("Fasta-Alignment file created [%s]\n",
227 fastaOutName.c_str());
231 // Output the alignment to the screen if it is required.
232 if (userParameters->getShowAlign() && userParameters->getMenuFlag())
237 catch(const exception& e)
239 cerr << "An exception has occured in the createAlignmentOutput function.\n"
245 bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames)
247 if(!userParameters->getOutputClustal() &&
248 !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
249 !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
250 !userParameters->getOutputNexus() && !userParameters->getOutputFasta())
252 utilityObject->error("You must select an alignment output format\n");
256 if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == ""
257 && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == ""
258 && fileNames.phylipFile == "")
260 utilityObject->error("No names for output files. Cannot output alignment.\n");
264 if(fileNames.clustalFile != "")
266 clustalOutName = fileNames.clustalFile;
267 if(!openExplicitFile(clustalOutFile, clustalOutName))
272 if(fileNames.fastaFile != "")
274 fastaOutName = fileNames.fastaFile;
275 if(!openExplicitFile(fastaOutFile, fastaOutName))
280 if(fileNames.gcgFile != "")
282 gcgOutName = fileNames.gcgFile;
283 if(!openExplicitFile(gcgOutFile, gcgOutName))
288 if(fileNames.gdeFile != "")
290 gdeOutName = fileNames.gdeFile;
291 if(!openExplicitFile(gdeOutFile, gdeOutName))
296 if(fileNames.nexusFile != "")
298 nexusOutName = fileNames.nexusFile;
299 if(!openExplicitFile(nexusOutFile, nexusOutName))
304 if(fileNames.nrbfFile != "")
306 nbrfOutName = fileNames.nrbfFile;
307 if(!openExplicitFile(nbrfOutFile, nbrfOutName))
312 if(fileNames.phylipFile != "")
314 phylipOutName = fileNames.phylipFile;
315 if(!openExplicitFile(phylipOutFile, phylipOutName))
323 * The function openAlignmentOutput opens a file for output. It returns true if it
324 * has been successful and false if it has not been successful
326 bool AlignmentOutput::openAlignmentOutput(string path)
328 if(!userParameters->getOutputClustal() &&
329 !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
330 !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
331 !userParameters->getOutputNexus() && !userParameters->getOutputFasta())
333 utilityObject->error("You must select an alignment output format\n");
336 string _fileNameToOutput = path;
337 if(_fileNameToOutput == "")
339 /*_fileNameToOutput = userParameters->getSeqName();*/
340 /* BUG 166 , file extension, FS, 2009-01-26 */
341 utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput);
344 if(userParameters->getOutputClustal())
346 if (userParameters->getOutfileName() != "")
348 clustalOutName = userParameters->getOutfileName();
349 if(!openExplicitFile(clustalOutFile, clustalOutName))
356 clustalOutName = openOutputFile(clustalOutFile,
357 "\nEnter a name for the CLUSTAL output file ",
358 _fileNameToOutput, "aln");
360 if(clustalOutName == "")
362 return false; // We have not been successful.
367 if(userParameters->getOutputNbrf())
369 if (userParameters->getOutfileName() != "")
371 nbrfOutName = userParameters->getOutfileName();
372 if(!openExplicitFile(nbrfOutFile, nbrfOutName))
379 nbrfOutName = openOutputFile(nbrfOutFile,
380 "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput,
383 if(nbrfOutName == "")
385 return false; // We have not been successful.
390 if(userParameters->getOutputGCG())
392 if (userParameters->getOutfileName() != "")
394 gcgOutName = userParameters->getOutfileName();
395 if(!openExplicitFile(gcgOutFile, gcgOutName))
402 gcgOutName = openOutputFile(gcgOutFile,
403 "\nEnter a name for the GCG output file", _fileNameToOutput, "msf");
407 return false; // We have not been successful.
412 if(userParameters->getOutputPhylip())
414 if (userParameters->getOutfileName() != "")
416 phylipOutName = userParameters->getOutfileName();
417 if(!openExplicitFile(phylipOutFile, phylipOutName))
424 phylipOutName = openOutputFile(phylipOutFile,
425 "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy");
427 if(phylipOutName == "")
429 return false; // We have not been successful.
434 if(userParameters->getOutputGde())
436 if (userParameters->getOutfileName() != "")
438 gdeOutName = userParameters->getOutfileName();
439 if(!openExplicitFile(gdeOutFile, gdeOutName))
446 gdeOutName = openOutputFile(gdeOutFile,
447 "\nEnter a name for the GDE output file ",
448 _fileNameToOutput, "gde");
452 return false; // We have not been successful.
457 if(userParameters->getOutputNexus())
459 if (userParameters->getOutfileName() != "")
461 nexusOutName = userParameters->getOutfileName();
462 if(!openExplicitFile(nexusOutFile, nexusOutName))
469 nexusOutName = openOutputFile(nexusOutFile,
470 "\nEnter a name for the NEXUS output file ",
471 _fileNameToOutput, "nxs");
473 if(nexusOutName == "")
475 return false; // We have not been successful.
480 if(userParameters->getOutputFasta())
482 if (userParameters->getOutfileName() != "")
484 fastaOutName = userParameters->getOutfileName();
485 if(!openExplicitFile(fastaOutFile, fastaOutName))
492 fastaOutName = openOutputFile(fastaOutFile,
493 "\nEnter a name for the Fasta output file ",
494 _fileNameToOutput, "fasta");
496 if(fastaOutName == "")
498 return false; // We have not been successful.
509 * The function openExplicitFile is used to open a file when we have the name already.
510 * It returns true if the file has been opened correctly, false otherwise.
512 bool AlignmentOutput::openExplicitFile(auto_ptr<ofstream>& outFile, string fileName)
516 cerr << "Bad output file [" << fileName << "]\n";
517 utilityObject->error("Bad output file [%s]\n", fileName.c_str());
521 outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc));
522 if(!outFile->is_open())
524 utilityObject->error("Cannot open output file [%s]\n", fileName.c_str());
531 * The function openOutputFile is used to open a file when we dont have the name yet.
532 * This function returns the name if it has been successful. If it has not been
533 * successful it returns a blank string ""
535 string AlignmentOutput::openOutputFile(auto_ptr<ofstream>& outFile, string prompt,
536 string path, string fileExtension)
539 string _fileName; // Will return this name.
541 _fileName = path + fileExtension;
543 if(_fileName.compare(userParameters->getSeqName()) == 0)
545 cout << "Output file name is the same as input file.\n";
546 if (userParameters->getMenuFlag())
548 message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]";
550 utilityObject->getStr(message, temp);
557 else if (userParameters->getMenuFlag())
560 message = prompt + " [" + _fileName + "]";
561 utilityObject->getStr(message, temp);
568 outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc));
569 if(!outFile->is_open())
571 utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str());
578 * fastaOut: output the alignment in FASTA format
580 void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput)
587 int firstRes = partToOutput._firstRes;
588 int lastRes = partToOutput._lastRes;
589 int firstSeq = partToOutput._firstSeq;
590 int lastSeq = partToOutput._lastSeq;
592 cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n";
595 const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29
596 vector<char> sequence;
597 sequence.assign(lastRes + 1, '0');
599 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
601 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
602 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
604 lineLength = LINELENGTH;
608 for(ii = firstSeq; ii <= lastSeq; ii++)
610 i = alignPtr->getOutputIndex(ii - 1);
612 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
613 for(j = firstRes; j <= lastRes; j++) //- nige
615 if (j <= alignPtr->getSeqLength(i))
617 //val = alignPtr.getResidue(i, j);
618 val = (*alignment)[i][j]; // NOTE june29
624 if((val == -3) || (val == 253))
628 else if((val < 0) || (val > userParameters->getMaxAA()))
634 residue = userParameters->getAminoAcidCode(val);
636 if (userParameters->getLowercase())
638 sequence[j - firstRes] = residue;
642 sequence[j - firstRes] = residue;
646 (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i));
647 if(userParameters->getSeqRange())
649 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
650 (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end;
653 (*fastaOutFile) << "\n";
654 for(j = 1; j <= slen; j++)
656 (*fastaOutFile) << sequence[j-1];
657 if((j % lineLength == 0) || (j == slen))
659 (*fastaOutFile) << "\n";
663 fastaOutFile->close();
664 cout << "FASTA file created!\n";
666 catch(const bad_alloc& e)
668 fastaOutFile->close();
669 cerr << "A bad_alloc exception has occured in the fastaOut function.\n"
673 catch(VectorOutOfRange e)
675 fastaOutFile->close();
676 cerr << "An exception has occured in the fastaOut function.\n"
682 fastaOutFile->close();
683 cerr << "An exception has occured in the fastaOut function.\n";
690 * gcgOut: output the alignment in gcg file format
692 void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput)
696 int i, ii, chunks, block;
697 int j, k, pos1, pos2;
702 int firstRes = partToOutput._firstRes;
703 int lastRes = partToOutput._lastRes;
704 int firstSeq = partToOutput._firstSeq;
705 int lastSeq = partToOutput._lastSeq;
707 vector<char> sequence;
708 vector<int> allChecks;
709 int _maxLength = alignPtr->getMaxAlnLength();
710 sequence.assign(_maxLength + 1, '0');
711 allChecks.assign(lastSeq + 1, 0);
712 const SeqArray* alignment = alignPtr->getSeqArray();
714 for(i = firstSeq; i <= lastSeq; i++)
716 //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige
717 for(j = firstRes; j <= lastRes; j++) //- nige
719 if (j <= alignPtr->getSeqLength(i))
721 //val = alignPtr.getResidue(i, j);
722 val = (*alignment)[i][j]; // NOTE june29
728 if((val == -3) || (val == 253))
732 else if((val < 0) || (val > userParameters->getMaxAA()))
738 residue = userParameters->getAminoAcidCode(val);
740 sequence[j - firstRes + 1] = residue;
742 // pad any short sequences with gaps, to make all sequences the same length
743 for(; j <= firstRes + lastRes - 1; j++)
745 sequence[j - firstRes + 1] = '.';
747 allChecks[i] = SeqGCGCheckSum(&sequence, lastRes);
751 for(i = 1; i <= alignPtr->getNumSeqs(); i++)
753 _index = alignPtr->getOutputIndex(i - 1);
754 grandChecksum += allChecks[_index];
756 grandChecksum = grandChecksum % 10000;
758 (*gcgOutFile) << "PileUp\n\n";
759 (*gcgOutFile) << "\n\n MSF:" << setw(5) << lastRes << " Type: ";
761 if(userParameters->getDNAFlag())
763 (*gcgOutFile) << "N";
767 (*gcgOutFile) << "P";
770 (*gcgOutFile) << " Check:" << setw(6) << grandChecksum << " .. \n\n";
773 int length = lastRes - firstRes + 1; //- nige
775 for(ii = firstSeq; ii <= lastSeq; ii++)
777 i = alignPtr->getOutputIndex(ii - 1);
778 _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR;
780 (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo Len:"
781 //<< setw(5) << lastRes << " Check:" << setw(6) << allChecks[i] //- nige
782 << setw(5) << length << " Check:" << setw(6) << allChecks[i]
783 << " Weight: " << fixed << setprecision(1) << _seqWeight << "\n";
785 (*gcgOutFile) << "\n//\n";
787 chunks = length / GCG_LINELENGTH; //- nige
788 if(length % GCG_LINELENGTH != 0) //- nige
793 for(block = 1; block <= chunks; block++)
795 (*gcgOutFile) << "\n\n";
796 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
797 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige
799 for(ii = firstSeq; ii <= lastSeq; ii++)
801 i = alignPtr->getOutputIndex(ii - 1);
802 if (!userParameters->getSeqRange())
804 (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left
805 << alignPtr->getName(i) << " ";
809 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
810 std::stringstream ss;
811 std::stringstream ss2;
818 string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart;
819 nameAndRange += "-" + rangeEnd;
820 (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left
823 for(j = pos1, k = 1; j <= pos2; j++, k++)
827 //check for int sequences - pad out with '.' characters to end
829 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
831 //val = alignPtr.getResidue(i, j + firstRes - 1);
832 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
838 if((val == -3) || (val == 253))
842 else if((val < 0) || (val > userParameters->getMaxAA()))
848 residue = userParameters->getAminoAcidCode(val);
850 (*gcgOutFile) << residue;
853 (*gcgOutFile) << " ";
858 (*gcgOutFile) << "\n\n";
861 catch(const bad_alloc& e)
864 cerr << "A bad_alloc exception has occured in the gcgOut function.\n"
868 catch(VectorOutOfRange e)
871 cerr << "An exception has occured in the gcgOut function.\n"
878 cerr << "An exception has occured in the gcgOut function.\n";
883 void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput)
885 int firstRes = partToOutput._firstRes;
886 int lastRes = partToOutput._lastRes;
887 int firstSeq = partToOutput._firstSeq;
888 int lastSeq = partToOutput._lastSeq;
893 int i, ii, chunks, block;
894 int j, k, pos1, pos2;
897 vector<string> _seqNames;
898 const SeqArray* alignment = alignPtr->getSeqArray();
901 // Push on a blank string. This is because the index starts at 1 for the seqs etc..
902 _seqNames.push_back("");
906 for(i = firstSeq; i <= lastSeq; i++)
908 _seqNames.push_back(alignPtr->getName(i));
909 ii = _seqNames.size();
918 for(i = 0; i < (int)_seqNames.size() - 1; i++)
920 for(j = i + 1; j < (int)_seqNames.size(); j++)
922 if (_seqNames[i].compare(_seqNames[j]) == 0)
930 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous.");
934 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.");
938 int length = lastRes - firstRes + 1; //- nige
940 chunks = length / GCG_LINELENGTH; //- nige
941 if(length % GCG_LINELENGTH != 0) //- nige
946 (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " "
947 << setw(6) << length; //-nige
949 for(block = 1; block <= chunks; block++)
951 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
952 //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige
953 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;
954 for(ii = firstSeq; ii <= lastSeq; ii++)
956 i = alignPtr->getOutputIndex(ii - 1);
959 if(!userParameters->getSeqRange())
961 string name = alignPtr->getName(i);
962 (*phylipOutFile) << "\n" << setw(10) << left
963 << name.substr(0,10) << " ";
967 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
968 std::stringstream ss;
969 std::stringstream ss2;
976 string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
977 nameAndRange += rangeStart + "-" + rangeEnd;
978 (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
985 (*phylipOutFile) << "\n ";
987 for(j = pos1, k = 1; j <= pos2; j++, k++)
989 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
991 //val = alignPtr.getResidue(i, j + firstRes - 1);
992 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
998 if((val == -3) || (val == 253))
1002 else if((val < 0) || (val > userParameters->getMaxAA()))
1008 residue = userParameters->getAminoAcidCode(val);
1010 (*phylipOutFile) << residue;
1013 (*phylipOutFile) << " ";
1017 (*phylipOutFile) << "\n";
1020 catch(const bad_alloc& e)
1022 phylipOutFile->close();
1023 cerr << "A bad_alloc exception has occured in the phylipOut function.\n"
1024 << e.what() << "\n";
1027 catch(VectorOutOfRange e)
1029 phylipOutFile->close();
1030 cerr << "An exception has occured in the phylipOut function.\n"
1031 << e.what() << "\n";
1036 phylipOutFile->close();
1037 cerr << "An exception has occured in the phylipOut function.\n";
1042 void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput)
1044 int firstRes = partToOutput._firstRes;
1045 int lastRes = partToOutput._lastRes;
1046 int firstSeq = partToOutput._firstSeq;
1047 int lastSeq = partToOutput._lastSeq;
1052 int i, ii, chunks, block;
1053 int j, k, pos1, pos2;
1054 const SeqArray* alignment = alignPtr->getSeqArray();
1057 int length = lastRes - firstRes + 1; //- nige
1059 chunks = length / GCG_LINELENGTH; //- nige
1060 if(length % GCG_LINELENGTH != 0) //- nige
1065 (*nexusOutFile) << "#NEXUS\n";
1066 (*nexusOutFile) << "BEGIN DATA;\n";
1067 (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs()
1068 << " nchar=" << length << ";\n"; //- nige
1069 (*nexusOutFile) << "format missing=?\n";
1070 (*nexusOutFile) << "symbols=\"";
1072 for(i = 0; i <= userParameters->getMaxAA(); i++)
1074 (*nexusOutFile) << userParameters->getAminoAcidCode(i);
1076 (*nexusOutFile) << "\"\n";
1077 (*nexusOutFile) << "interleave datatype=";
1078 bool _dnaFlag = userParameters->getDNAFlag();
1079 string _type = _dnaFlag ? "DNA " : "PROTEIN ";
1080 (*nexusOutFile) << _type;
1081 (*nexusOutFile) << "gap= -;\n";
1082 (*nexusOutFile) << "\nmatrix";
1084 for(block = 1; block <= chunks; block++)
1086 pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
1087 pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige
1088 for(ii = firstSeq; ii <= lastSeq; ii++)
1090 i = alignPtr->getOutputIndex(ii - 1);
1091 if (!userParameters->getSeqRange())
1093 (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left
1094 << alignPtr->getName(i) << " ";
1098 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1100 std::stringstream ss;
1101 std::stringstream ss2;
1108 string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
1109 nameAndRange += rangeStart + "-" + rangeEnd;
1110 (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
1114 for(j = pos1, k = 1; j <= pos2; j++, k++)
1116 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1118 //val = alignPtr.getResidue(i, j + firstRes - 1);
1119 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1125 if((val == -3) || (val == 253))
1129 else if((val < 0) || (val > userParameters->getMaxAA()))
1135 residue = userParameters->getAminoAcidCode(val);
1137 (*nexusOutFile) << residue;
1140 (*nexusOutFile) << "\n";
1142 (*nexusOutFile) << ";\nend;\n";
1144 catch(const bad_alloc& e)
1146 nexusOutFile->close();
1147 cerr << "A bad_alloc exception has occured in the nexusOut function.\n"
1148 << e.what() << "\n";
1151 catch(VectorOutOfRange e)
1153 nexusOutFile->close();
1154 cerr << "An exception has occured in the nexusOut function.\n"
1155 << e.what() << "\n";
1160 nexusOutFile->close();
1161 cerr << "An exception has occured in the nexusOut function.\n";
1167 * gdeOut: print out the alignment in gde file format.
1169 void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput)
1171 int firstRes = partToOutput._firstRes;
1172 int lastRes = partToOutput._lastRes;
1173 int firstSeq = partToOutput._firstSeq;
1174 int lastSeq = partToOutput._lastSeq;
1176 int length = lastRes - firstRes + 1; //- nige
1182 vector<char> _ssMask1, _ssMask2, seq;
1186 const SeqArray* alignment = alignPtr->getSeqArray();
1189 seq.assign(alignPtr->getMaxAlnLength() + 1, '0');
1191 // decide the line length for this alignment - maximum is LINELENGTH
1192 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1193 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1194 if (lineLength > LINELENGTH || lineLength <= 0)
1196 lineLength = LINELENGTH;
1199 // If we are using the secondary structure info from profile 1, set it up
1200 if (userParameters->getStructPenalties1() == SECST &&
1201 userParameters->getUseSS1() == true)
1203 int _lengthSeq1 = alignPtr->getSeqLength(1);
1204 vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1205 _ssMask1.assign(_lengthSeq1 + 10, 0);
1207 for (i = 0;i < _lengthSeq1;i++)
1209 _ssMask1[i] = _secStructMask1->at(i);
1211 printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1);
1214 // If we are using the secondary structure info from profile 2, set it up
1215 if (userParameters->getStructPenalties2() == SECST &&
1216 userParameters->getUseSS2() == true)
1218 int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1219 int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1220 _ssMask2.assign(lengthSeqProfile2 + 10, 0);
1221 vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1223 for (i=0;i < lengthSeqProfile2;i++)
1225 _ssMask2[i] = _secStructMask2->at(i);
1227 printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2);
1230 bool _dnaFlag = userParameters->getDNAFlag();
1231 string _prefix = _dnaFlag ? "#" : "%";
1233 for(ii = firstSeq; ii <= lastSeq; ii++)
1235 i = alignPtr->getOutputIndex(ii - 1);
1237 (*gdeOutFile) << _prefix;
1238 if(!userParameters->getSeqRange())
1240 (*gdeOutFile) << alignPtr->getName(i) << "\n";
1244 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1245 (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/"
1246 << rnum.start << "-" << rnum.end << "\n";
1250 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1251 for(j = firstRes; j <= lastRes; j++) //- nige
1253 if (j <= alignPtr->getSeqLength(i))
1255 //val = alignPtr.getResidue(i, j);
1256 val = (*alignment)[i][j]; // NOTE june29
1262 if((val == -3) || (val == 253))
1266 else if((val < 0) || (val > userParameters->getMaxAA()))
1272 residue = userParameters->getAminoAcidCode(val);
1274 if (userParameters->getLowercase())
1276 seq[j - firstRes] = (char)tolower((int)residue);
1280 seq[j - firstRes] = residue;
1284 for(j = 1; j <= slen; j++)
1286 (*gdeOutFile) << seq[j-1];
1287 if((j % lineLength == 0) || (j == slen))
1289 (*gdeOutFile) << "\n";
1294 if (userParameters->getOutputStructPenalties() == 0 ||
1295 userParameters->getOutputStructPenalties() == 2)
1297 if (userParameters->getStructPenalties1() == SECST &&
1298 userParameters->getUseSS1() == true)
1300 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1301 << alignPtr->getSecStructName1() << "\n";
1303 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1304 for(i = firstRes; i <= lastRes; i++) //- nige
1306 val = _ssMask1[i - 1];
1307 if (val == userParameters->getGapPos1() ||
1308 val == userParameters->getGapPos2())
1310 seq[i - firstRes] = '-';
1314 seq[i - firstRes] = val;
1318 for(i = 1; i <= lastRes; i++)
1320 (*gdeOutFile) << seq[i-1];
1321 if((i % lineLength == 0) || (i == lastRes))
1323 (*gdeOutFile) << "\n";
1328 if (userParameters->getStructPenalties2() == SECST &&
1329 userParameters->getUseSS2() == true)
1331 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1332 << alignPtr->getSecStructName2() << "\n";
1334 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1335 for(i = firstRes; i <= lastRes; i++) //- nige
1337 val = _ssMask2[i - 1];
1338 if (val == userParameters->getGapPos1() ||
1339 val == userParameters->getGapPos2())
1341 seq[i - firstRes] = '-';
1345 seq[i - firstRes] = val;
1349 //for(i = 1; i <= lastRes; i++) //- nige
1350 for(i = 1; i <= length; i++) //- nige
1352 (*gdeOutFile) << seq[i - 1];
1353 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1354 if((i % lineLength == 0) || (i == length)) //- nige
1356 (*gdeOutFile) << "\n";
1361 if (userParameters->getOutputStructPenalties() == 1 ||
1362 userParameters->getOutputStructPenalties() == 2)
1364 if (userParameters->getStructPenalties1() != NONE &&
1365 userParameters->getUseSS1() == true)
1367 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1368 << alignPtr->getSecStructName1() << "\n";
1370 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();
1371 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1372 for(i = firstRes; i <= lastRes; i++) //- nige
1374 val = _gapPenaltyMask1->at(i - 1);
1375 if (val == userParameters->getGapPos1() ||
1376 val == userParameters->getGapPos2())
1378 seq[i - firstRes] = '-';
1382 seq[i - firstRes] = val;
1386 //for(i = 1; i <= lastRes; i++) //- nige
1387 for(i = 1; i <= length; i++) //- nige
1389 (*gdeOutFile) << seq[i - 1];
1390 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1391 if((i % lineLength == 0) || (i == length)) //- nige
1393 (*gdeOutFile) << "\n";
1397 if (userParameters->getStructPenalties2() != NONE &&
1398 userParameters->getUseSS2() == true)
1400 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1401 << alignPtr->getSecStructName2() << "\n";
1403 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1404 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1405 for(i = firstRes; i < length; i++) //- nige
1407 val = _gapPenaltyMask2->at(i-1);
1408 if (val == userParameters->getGapPos1() ||
1409 val == userParameters->getGapPos2())
1411 seq[i - firstRes] = '-';
1415 seq[i - firstRes] = val;
1419 //for(i = 1; i <= lastRes; i++) //- nige
1420 for(i = 1; i <= length; i++) //- nige
1422 (*gdeOutFile) << seq[i - 1];
1423 //if((i % lineLength == 0) || (i == lastRes)) //- nige
1424 if((i % lineLength == 0) || (i == length)) //- nige
1426 (*gdeOutFile) << "\n";
1431 gdeOutFile->close();
1433 catch(const bad_alloc& e)
1435 gdeOutFile->close();
1436 cerr << "A bad_alloc exception has occured in the gdeOut function.\n"
1437 << e.what() << "\n";
1440 catch(VectorOutOfRange e)
1442 gdeOutFile->close();
1443 cerr << "An exception has occured in the gdeOut function.\n"
1444 << e.what() << "\n";
1449 gdeOutFile->close();
1450 cerr << "An exception has occured in the gdeOut function.\n";
1455 void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput)
1463 int firstRes = partToOutput._firstRes;
1464 int lastRes = partToOutput._lastRes;
1465 int firstSeq = partToOutput._firstSeq;
1466 int lastSeq = partToOutput._lastSeq;
1470 int _maxLength = alignPtr->getMaxAlnLength();
1471 vector<char> sequence;
1472 sequence.assign(_maxLength + 1, '0');
1473 const SeqArray* alignment = alignPtr->getSeqArray();
1474 // decide the line length for this alignment - maximum is LINELENGTH
1475 lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1476 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1477 if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07
1479 lineLength = LINELENGTH;
1482 // Get name prefix. DL if DNA, P1 if protein
1484 bool _dnaFlag = userParameters->getDNAFlag();
1485 namePrefix = _dnaFlag ? ">DL;" : ">P1;";
1487 //int length = lastRes - firstRes + 1; //- nige
1489 for(ii = firstSeq; ii <= lastSeq; ii++)
1491 i = alignPtr->getOutputIndex(ii - 1);
1493 (*nbrfOutFile) << namePrefix;
1495 if (!userParameters->getSeqRange())
1497 (*nbrfOutFile) << alignPtr->getName(i) << "\n"
1498 << alignPtr->getTitle(i) << "\n";
1502 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1503 (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start
1504 << "-"<< rnum.end << "\n"
1505 << alignPtr->getTitle(i) << "\n";
1508 //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1509 for(j = firstRes; j <= lastRes; j++) //- nige
1511 // NOTE I changed this here!!!!!
1512 if (j <= alignPtr->getSeqLength(i))
1514 //val = alignPtr.getResidue(i, j);
1515 val = (*alignment)[i][j]; // NOTE june29
1521 if((val == -3) || (val == 253))
1525 else if((val < 0) || (val > userParameters->getMaxAA()))
1531 residue = userParameters->getAminoAcidCode(val);
1533 sequence[j - firstRes] = residue;
1536 for(j = 1; j <= slen; j++)
1538 (*nbrfOutFile) << sequence[j - 1];
1539 if((j % lineLength == 0) || (j == slen))
1541 (*nbrfOutFile) << "\n";
1544 (*nbrfOutFile) << "*\n";
1546 nbrfOutFile->close();
1548 catch(const bad_alloc& e)
1550 nbrfOutFile->close();
1551 cerr << "A bad_alloc exception has occured in the nbrfOut function.\n"
1552 << e.what() << "\n";
1555 catch(VectorOutOfRange e)
1557 nbrfOutFile->close();
1558 cerr << "An exception has occured in the nbrfOut function.\n"
1559 << e.what() << "\n";
1564 nbrfOutFile->close();
1565 cerr << "An exception has occured in the nbrfOut function.\n";
1571 * The function clustalOut is used to ouput the Alignment into a clustal format
1574 void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput)
1576 int firstRes = partToOutput._firstRes;
1577 int lastRes = partToOutput._lastRes;
1578 int firstSeq = partToOutput._firstSeq;
1579 int lastSeq = partToOutput._lastSeq;
1585 vector<int> printSeqNo;
1586 vector<char> ss_mask1, ss_mask2;
1587 const SeqArray* alignment = alignPtr->getSeqArray();
1591 int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks;
1598 string sequenceLine = "";
1601 int _numSequences = alignPtr->getNumSeqs();
1602 // PMcG revert to Clustalw1.83 output format for name width
1603 const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY);
1605 seqNo.assign(_numSequences + 1, 0);
1606 printSeqNo.assign(_numSequences + 1, 0);
1608 // check that lastSeq <= _numSequences
1609 if(lastSeq > _numSequences)
1611 lastSeq = _numSequences;
1614 for (i = firstSeq; i <= lastSeq; i++)
1616 printSeqNo[i] = seqNo[i] = 0;
1617 for(j = 1; j < firstRes; j++)
1619 //val = alignPtr.getResidue(i, j);
1620 val = (*alignment)[i][j]; // NOTE june29
1621 if((val >= 0) || (val <= userParameters->getMaxAA()))
1628 seq1.assign(alignPtr->getMaxAlnLength() + 1, 0);
1629 // Check if we have secondary structure in file 1 and if we want to output it.
1630 if (userParameters->getStructPenalties1() == SECST &&
1631 userParameters->getUseSS1() == true)
1633 int lengthSeq1 = alignPtr->getSeqLength(1);
1634 ss_mask1.assign(lengthSeq1 + 10, 0);
1635 vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1637 for (i = 0; i < lengthSeq1; i++)
1639 ss_mask1[i] = _secStructMask1->at(i);
1641 printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1);
1645 // Check if we have secondary structure in file 2 and if we want to output it.
1646 if (userParameters->getStructPenalties2() == SECST &&
1647 userParameters->getUseSS2() == true &&
1648 userParameters->getProfile2Empty() == false)
1650 // AW added test getProfile2Empty was meant to fix bug
1651 // 141, but only prevents segfault, output still faulty.
1652 // Has to be fixed somewhere else
1653 int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1654 int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1655 ss_mask2.assign(lengthSeqProfile2 + 10, 0);
1656 vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1657 for (i = 0; i < lengthSeqProfile2; i++)
1659 ss_mask2[i] = _secStructMask2->at(i);
1661 printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2);
1664 (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel()
1665 << " multiple sequence alignment\n\n";
1667 // decide the line length for this alignment - maximum is LINELENGTH
1669 //PMcG 9-2-2008 make line output same as 1.83
1670 //lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1671 lineLength = PAGEWIDTH - NAMESWIDTH;
1672 lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1673 if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
1675 lineLength = LINELENGTH;
1678 int length = lastRes - firstRes + 1; //- nige
1680 chunks = length / lineLength; //- nige
1681 if(length % lineLength != 0) //- nige
1685 //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n",
1686 // firstRes, lastRes, length, chunks);
1688 // This will loop through each of the blocks.
1689 for(lv1 = 1; lv1 <= chunks; ++lv1)
1691 // pos is begining of chunk, ptr is the end of the chunk to be displayed.
1692 pos = ((lv1 - 1) * lineLength) + 1;
1693 ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige
1695 (*clustalOutFile) << "\n";
1696 int _outStructPenalty = userParameters->getOutputStructPenalties();
1698 string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07
1700 if((int)secStructName1.size() > MAXNAMESTODISPLAY)
1702 //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY);
1703 secStructName1 = secStructName1.substr(0, NAMESWIDTH);
1705 if (_outStructPenalty == 0 || _outStructPenalty == 2)
1707 if (userParameters->getStructPenalties1() == SECST &&
1708 userParameters->getUseSS1() == true)
1710 for(i = pos; i <= ptr; ++i)
1712 // Check if we can access mask position first
1713 if((int)ss_mask1.size() > i + firstRes - 2)
1715 val = ss_mask1[i + firstRes - 2];
1716 if (val == userParameters->getGapPos1() ||
1717 val == userParameters->getGapPos2())
1719 temp[i - pos] = '-';
1723 temp[i - pos] = val;
1731 temp[i - pos] = EOS;
1733 if(userParameters->getSeqRange())
1735 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1736 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1737 clusSecStructOffset)
1738 << left << secStructName1
1739 << " " << temp << "\n";
1743 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1744 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1745 << left << secStructName1 << " "
1750 if (_outStructPenalty == 1 || _outStructPenalty == 2)
1752 if (userParameters->getStructPenalties1() != NONE &&
1753 userParameters->getUseSS1() == true)
1755 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();
1756 for(i = pos; i <= ptr; ++i)
1758 if((int)_gapPenaltyMask1->size() > i + firstRes - 2)
1760 val = _gapPenaltyMask1->at(i + firstRes - 2);
1761 if (val == userParameters->getGapPos1() ||
1762 val == userParameters->getGapPos2())
1764 temp[i - pos] = '-';
1768 temp[i - pos] = val;
1776 temp[i - pos] = EOS;
1778 //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1779 (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1780 << secStructName1 << " "
1786 string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07
1787 //if((int)secStructName2.size() > MAXNAMESTODISPLAY)
1788 if((int)secStructName2.size() > NAMESWIDTH)
1790 //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY);
1791 secStructName2 = secStructName2.substr(0, NAMESWIDTH);
1794 if (_outStructPenalty == 0 || _outStructPenalty == 2)
1796 if (userParameters->getStructPenalties2() == SECST &&
1797 userParameters->getUseSS2() == true)
1799 for(i = pos; i <= ptr; ++i)
1801 if((int)ss_mask2.size() > i + firstRes - 2)
1803 val = ss_mask2[i + firstRes - 2];
1804 if (val == userParameters->getGapPos1() ||
1805 val == userParameters->getGapPos2())
1807 temp[i - pos] = '-';
1811 temp[i - pos] = val;
1819 temp[i - pos] = EOS;
1822 if (userParameters->getSeqRange())
1824 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1825 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1826 clusSecStructOffset)
1827 << left << secStructName2;
1828 (*clustalOutFile) << " " << temp << "\n";
1832 //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1833 (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1834 << left << secStructName2 << " "
1840 if (_outStructPenalty == 1 || _outStructPenalty == 2)
1842 if (userParameters->getStructPenalties2() != NONE &&
1843 userParameters->getUseSS2() == true)
1845 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1846 for(i = pos; i <= ptr; ++i)
1848 if((int)_gapPenaltyMask2->size() > i + firstRes - 2)
1850 val = _gapPenaltyMask2->at(i + firstRes - 2);
1851 if (val == userParameters->getGapPos1() ||
1852 val == userParameters->getGapPos2())
1854 temp[i - pos] = '-';
1858 temp[i - pos] = val;
1866 temp[i - pos] = EOS;
1868 //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1869 (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1870 << secStructName2 << " "
1875 for(ii = firstSeq; ii <= lastSeq; ++ii)
1877 i = alignPtr->getOutputIndex(ii - 1);
1879 for(j = pos; j <= ptr; ++j)
1881 if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1883 //val = alignPtr.getResidue(i, j + firstRes - 1);
1884 val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1890 if((val == -3) || (val == 253))
1894 else if((val < 0) || (val > userParameters->getMaxAA()))
1900 seq1[j] = userParameters->getAminoAcidCode(val);
1905 for(; j <= ptr; ++j)
1912 for(int index = pos; index < ptr + 1; index++)
1914 sequenceLine += seq1[index];
1917 string seqName = alignPtr->getName(i);
1918 //if((int)seqName.size() > MAXNAMESTODISPLAY)
1919 if((int)seqName.size() > NAMESWIDTH)
1921 //seqName = seqName.substr(0, MAXNAMESTODISPLAY);
1922 seqName = seqName.substr(0, NAMESWIDTH);
1925 if (!userParameters->getSeqRange())
1927 // NOTE I made a change here from + 5, to + 6.
1928 //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left
1929 // << alignPtr->getName(i);
1931 //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left
1933 // PMcG changed this to revert to behaviour of clustalw1.83
1935 //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6)
1936 (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName;
1940 else // Put the sequence range information in!
1942 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1943 string nameOnly = nameonly(seqName);
1944 std::stringstream ss;
1945 std::stringstream ss2;
1952 tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd;
1953 //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset)
1954 (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset)
1958 (*clustalOutFile) << sequenceLine;
1960 if (userParameters->getClSeqNumbers() && printSeqNo[i])
1962 (*clustalOutFile) << " " << seqNo[i];
1965 (*clustalOutFile) << "\n";
1968 // Now print out the conservation information!
1969 for(i = pos; i <= ptr; ++i)
1974 for(j = 1; j <= (int)strongGroup.size(); j++)
1976 catident1[j - 1] = 0;
1978 for(j = 1; j <= (int)weakGroup.size(); j++)
1980 catident2[j - 1] = 0;
1983 for(j = firstSeq; j <= lastSeq; ++j)
1985 if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) &&
1986 (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq)))
1988 if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) &&
1989 ((*alignment)[firstSeq][i + firstRes - 1] <=
1990 userParameters->getMaxAA()))
1992 // Count how many are identical to the first sequence
1993 if((*alignment)[firstSeq][i + firstRes - 1] ==
1994 (*alignment)[j][i + firstRes - 1])
1998 // Count how many are in the same category.
1999 for(k = 1; k <= (int)strongGroup.size(); k++)
2001 for(l = 0; (c = strongGroup[k - 1][l]); l++)
2003 int resCode = (*alignment)[j][i + firstRes - 1];
2004 if(resCode <= userParameters->getMaxAA() + 1)
2006 if (userParameters->getAminoAcidCode(resCode) == c)
2014 for(k = 1; k <= (int)weakGroup.size(); k++)
2016 for(l = 0; (c = weakGroup[k - 1][l]); l++)
2018 int resCode = (*alignment)[j][i + firstRes - 1];
2019 if(resCode <= userParameters->getMaxAA() + 1)
2021 if (userParameters->getAminoAcidCode(resCode) == c)
2033 // Now do the conservation part for each block.
2034 if(ident == lastSeq - firstSeq + 1)
2036 seq1[i] = '*'; // All residues the same!
2038 else if (!userParameters->getDNAFlag())
2040 for(k = 1; k <= (int)strongGroup.size(); k++)
2042 if (catident1[k - 1] == lastSeq - firstSeq + 1)
2044 seq1[i] = ':'; // All residues member of the same category
2050 for(k = 1; k <= (int)weakGroup.size(); k++)
2052 if (catident2[k - 1] == lastSeq - firstSeq + 1)
2054 seq1[i] = '.'; // All residues member of the same category
2063 for(int index = pos; index < ptr + 1; index++)
2065 sequenceLine += seq1[index];
2068 //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++)
2069 for(k = 0; k < NAMESWIDTH + 6; k++)
2071 (*clustalOutFile) << " ";
2073 if(userParameters->getSeqRange())
2076 (*clustalOutFile) << " ";
2079 (*clustalOutFile) << sequenceLine << "\n";
2081 clustalOutFile->close();
2083 catch(const bad_alloc& e)
2085 clustalOutFile->close();
2086 cerr << "A bad_alloc exception has occured in the clustalOut function.\n"
2087 << e.what() << "\n";
2090 catch(VectorOutOfRange e)
2092 clustalOutFile->close();
2093 cerr << "An exception has occured in the clustalOut function.\n"
2094 << e.what() << "\n";
2099 clustalOutFile->close();
2100 cerr << "An exception has occured in the clustalOut function.\n"
2101 << e.what() << "\n";
2106 clustalOutFile->close();
2107 cerr << "An exception has occured in the clustalOut function.\n";
2113 * The function printSecStructMask takes in the 2 mask vectors. It makes changes to
2114 * mask and puts the result in structMask. This is the one that will be printed out.
2115 * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no
2116 * way to do random access setting in vectors otherwise.
2118 void AlignmentOutput::printSecStructMask(int prfLength, vector<char>* mask,
2119 vector<char>* structMask)
2122 // calculate the gap penalty mask from the secondary structures
2127 while (i < prfLength)
2129 if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$')
2131 for (j = 0; j < userParameters->getHelixEndMinus(); j++)
2133 if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a'
2134 && mask->at(i+j) != '$'))
2138 (*structMask)[i+j] = 'a';
2141 while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$')
2145 if (mask->at(i) == '$')
2147 (*structMask)[i] = 'A';
2152 (*structMask)[i] = (*mask)[i];
2155 for (j = 0; j < userParameters->getHelixEndMinus(); j++)
2157 if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a'
2158 || mask->at(i-j-1) == '$'))
2160 (*structMask)[i - j - 1] = 'a';
2164 else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%')
2166 for (j = 0; j < userParameters->getStrandEndMinus(); j++)
2168 if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b'
2169 && mask->at(i+j) != '%'))
2173 (*structMask)[i+j] = 'b';
2176 while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%')
2180 if (mask->at(i) == '%')
2182 (*structMask)[i] = 'B';
2188 (*structMask)[i] = mask->at(i);
2192 for (j = 0; j < userParameters->getStrandEndMinus(); j++)
2194 if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' ||
2195 mask->at(i-j-1) == '%'))
2197 (*structMask)[i-j-1] = 'b';
2207 catch(const exception& e)
2209 // Catch all the exceptions
2210 cerr << "There has been an exception in the function printSecStructMask\n"
2211 << e.what() << "\n";
2217 * The function findRangeValues is used to find the range to be printed out.
2221 void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes,
2222 int len, int firstSeq)
2225 assert(firstRes > 0);
2227 assert(firstSeq > 0);
2235 char tmpName[FILENAMELEN + 15];
2237 int iEnd = 0; // to print sequence start-end with names
2247 const SeqArray* alignment = alignPtr->getSeqArray();
2253 i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere!
2254 string name = alignPtr->getName(i);
2256 if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3))
2261 for(tmpk = 1; tmpk < firstRes; tmpk++)
2263 // do this irrespective of above sscanf
2264 //val = alignPtr.getResidue(i, tmpk);
2265 val = (*alignment)[i][tmpk]; // NOTE june29
2266 if ((val < 0) || (val > userParameters->getMaxAA()))
2273 for(j = firstRes; j < firstRes + len; j++)
2275 if(j > alignPtr->getSeqLength(i))
2277 val = -3; // Cant get it so break out.
2281 //val = alignPtr.getResidue(i, j);
2282 val = (*alignment)[i][j]; // NOTE june29
2284 if((val == -3) || (val == 253))
2288 else if((val < 0) || (val > userParameters->getMaxAA()))
2296 if ( found && (iStart == 0) )
2303 if(userParameters->getSeqRange())
2305 cout << "Name : " << alignPtr->getName(i) << " "
2306 << "\n firstRes = "<< firstRes << " "
2307 << " len = " << len << " "
2308 << "\n iStart = " << iStart << " "
2309 << "\n tmpStart = " << tmpStart << " "
2310 << "\n ngaps = " << ngaps << " "
2311 << "\n pregaps = " << pregaps << " ";
2314 formula = iStart - pregaps;
2318 formula = iStart - pregaps + ( tmpStart == 1 ? 0: tmpStart-1) ;
2321 cout << "\n\nsuggestion iStart - pregaps + tmpStart - ntermgaps = "
2322 << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps
2323 << " formula " << formula << " ";
2327 cerr << "\n no range found .... strange, iStart = " << iStart;
2330 if (pregaps == firstRes - 1) // all gaps - now the conditions........
2332 formula = tmpStart ; // keep the previous start...
2334 formula = (formula <= 0) ? 1: formula;
2335 if (pregaps == 0 && tmpStart == 0)
2339 iEnd = formula + len - ngaps -1;
2340 rnum->start = formula;
2342 cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start
2343 << " - " << rnum->end;
2344 cout << " Done checking.........";
2346 catch(VectorOutOfRange e)
2348 cerr << "An exception has occured in the findRangeValues function.\n"
2349 << e.what() << "\n";
2354 cerr << "An exception has occured in findRangeValues function\n";
2359 string AlignmentOutput::nameonly(string s)
2366 while (i < (int)s.size())
2379 catch(const exception& e)
2381 cerr << "An exception has occured in the function nameonly\n"
2388 int AlignmentOutput::SeqGCGCheckSum(vector<char>* seq, int length)
2392 int seqResIndex = 1;
2393 //int seqLength = seq->size();
2395 for (i = 0, check = 0; i < length; i++, seqResIndex++)
2397 char _toUpperCase = (*seq)[seqResIndex];
2398 check += ((i % 57) + 1) * toupper(_toUpperCase);
2401 return (check % 10000);
2404 void AlignmentOutput::showAlign()
2408 char temp[MAXLINE + 1];
2413 if(userParameters->getOutputClustal())
2415 fileName = clustalOutName;
2417 else if(userParameters->getOutputNbrf())
2419 fileName = nbrfOutName;
2421 else if(userParameters->getOutputGCG())
2423 fileName = gcgOutName;
2425 else if(userParameters->getOutputPhylip())
2427 fileName = phylipOutName;
2429 else if(userParameters->getOutputGde())
2431 fileName = gdeOutName;
2433 else if(userParameters->getOutputNexus())
2435 fileName = nexusOutName;
2437 else if(userParameters->getOutputFasta())
2439 fileName = fastaOutName;
2443 return; // No file output type!
2447 _fileIn.open(fileName.c_str(), ios::in);
2448 _fileIn.seekg(0, std::ios::beg); // start at the beginning
2453 while(_fileIn.getline(temp, MAXLINE + 1, '\n'))
2455 //fputs(temp,stdout);
2456 cout << temp << "\n";
2458 if(numLines >= PAGE_LEN)
2461 utilityObject->getStr(string("Press [RETURN] to continue or X to stop"), answer);
2462 if(toupper(answer[0]) == 'X')
2475 utilityObject->getStr(string("Press [RETURN] to continue"), answer);