2e658ceb2092a336c4fb747de43a850035fd2ab8
[jabaws.git] / binaries / src / clustalw / src / alignment / AlignmentOutput.cpp
1 /**
2  * Author: Mark Larkin
3  * 
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
5  */
6 /**
7  * Output routines ported from clustalx C code in 'interface.c'.
8  *
9  * Changes:
10  *
11  * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention
12  * to define output columns, while the port uses (firsRes, lastRes). Fixed
13  * problems in all 7 output routines where the conventions were mixed giving
14  * over-long output blocks.
15  *
16  * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut
17  *
18  * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83
19  *                     added const NAMESWIDTH which is calculated based on the
20  *                     length of the longest sequence name and the upper and lower
21  *                     limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY
22  */
23
24 #ifdef HAVE_CONFIG_H
25     #include "config.h"
26 #endif
27 #include "AlignmentOutput.h"
28
29 namespace clustalw
30 {
31
32 AlignmentOutput::AlignmentOutput()
33 : clusSecStructOffset(9),
34   clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs
35 {
36     // NOTE in the old clustal these arrays ended with NULL as a value.
37     try
38     {
39         strongGroup.resize(9);
40         strongGroup[0] = string("STA");
41         strongGroup[1] = string("NEQK");
42         strongGroup[2] = string("NHQK");
43         strongGroup[3] = string("NDEQ");
44         strongGroup[4] = string("QHRK");
45         strongGroup[5] = string("MILV");
46         strongGroup[6] = string("MILF");
47         strongGroup[7] = string("HY");
48         strongGroup[8] = string("FYW");
49     
50         weakGroup.resize(11);
51         weakGroup[0] = string("CSA");
52         weakGroup[1] = string("ATV");
53         weakGroup[2] = string("SAG");
54         weakGroup[3] = string("STNK");
55         weakGroup[4] = string("STPA");
56         weakGroup[5] = string("SGND");
57         weakGroup[6] = string("SNDEQK");
58         weakGroup[7] = string("NDEQHK");
59         weakGroup[8] = string("NEQHRK");
60         weakGroup[9] = string("FVLIM");
61         weakGroup[10] = string("HFY");
62     }
63     catch(const exception& e)
64     {
65         cerr << "An exception has occured in the contructor of AlignmentOutput.\n"
66              << e.what() << "\n";
67         exit(1);    
68     }
69 }
70
71 /*
72  * The function createAlignmentOutput is used to call all the individual functions
73  * for the different file types. It is possible to output the alignment in all file
74  * types at the same time.
75  */
76 void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq)
77 {
78     int length;  
79     int firstRes; // starting sequence range - Ramu
80     int lastRes; // ending sequence range 
81     bool rangeOK;
82     
83     if((firstSeq <= 0) || (lastSeq < firstSeq))
84     {
85         utilityObject->error("Cannot produce alignment output."
86                 " Incorrect call to createAlignmentOutput."
87                 " firstSeq = %d"
88                 " lastSeq = %d\n", firstSeq, lastSeq);
89         return;
90     }
91     
92     length = 0;
93     firstRes = 1;
94     lastRes = 0;
95     rangeOK = false;
96     
97     try
98     {
99         length = alignPtr->getLengthLongestSequence();
100         lastRes = length;
101
102         if (userParameters->getRangeFromToSet()) 
103         {
104             firstRes = userParameters->getRangeFrom();
105             lastRes = userParameters->getRangeTo();
106             // Check if the numbers are ok.
107             if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1)) 
108             {
109                 cerr << "seqrange numbers are not set properly, using default....\n";
110                 firstRes = 1;
111                 lastRes = length;
112             }
113             else
114             {
115                 rangeOK = true;
116             }
117         }
118         if (rangeOK && (lastRes > length)) 
119         {
120             lastRes = length;
121             cout << "Seqrange " << lastRes << " is more than the " << length 
122                  << "  setting it to " << length << " \n";
123         }
124
125         if (userParameters->getMenuFlag())
126         {    
127             cout << "Consensus length = " << lastRes << " \n";
128         }
129
130         outputRegion partToOutput;
131         partToOutput._firstSeq = firstSeq;
132         partToOutput._lastSeq = lastSeq;
133         partToOutput._firstRes = firstRes;
134         partToOutput._lastRes = lastRes;
135
136         if(userParameters->getOutputClustal()) 
137         {
138             if(clustalOutFile.get() != 0 && clustalOutFile->is_open())
139             {
140                 clustalOut(alignPtr, partToOutput);
141                 if(clustalOutFile->is_open())
142                 {
143                     clustalOutFile->close();
144                 }
145                 utilityObject->info("CLUSTAL-Alignment file created  [%s]\n",
146                                     clustalOutName.c_str());
147             }
148         }
149         if(userParameters->getOutputNbrf())  
150         {
151             if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open())
152             {            
153                 nbrfOut(alignPtr, partToOutput);
154                 if(nbrfOutFile->is_open())
155                 {
156                     nbrfOutFile->close();
157                 }
158                 utilityObject->info("NBRF/PIR-Alignment file created  [%s]\n",
159                                     nbrfOutName.c_str());                
160             }
161         }
162         if(userParameters->getOutputGCG())  
163         {
164             if(gcgOutFile.get() != 0 && gcgOutFile->is_open())
165             {
166                 gcgOut(alignPtr, partToOutput);
167                 if(gcgOutFile->is_open())
168                 {
169                     gcgOutFile->close();
170                 }
171                 utilityObject->info("GCG-Alignment file created      [%s]\n",
172                                     gcgOutName.c_str());                
173             }
174         }
175         if(userParameters->getOutputPhylip())  
176         {
177             if(phylipOutFile.get() != 0 && phylipOutFile->is_open())
178             {
179                 phylipOut(alignPtr, partToOutput);
180                 if(phylipOutFile->is_open())
181                 {
182                     phylipOutFile->close();
183                 }
184                 utilityObject->info("PHYLIP-Alignment file created   [%s]\n",
185                                     phylipOutName.c_str());                
186             }
187         }
188         
189         if(userParameters->getOutputGde())  
190         {
191             if(gdeOutFile.get() != 0 && gdeOutFile->is_open())
192             {
193                 gdeOut(alignPtr, partToOutput);
194                 if(gdeOutFile->is_open())
195                 {
196                     gdeOutFile->close();
197                 }
198                 utilityObject->info("GDE-Alignment file created      [%s]\n",
199                                     gdeOutName.c_str());                
200             }
201         }
202         
203         if(userParameters->getOutputNexus())  
204         {
205             if(nexusOutFile.get() != 0 && nexusOutFile->is_open())
206             {
207                 nexusOut(alignPtr, partToOutput);
208                 if(nexusOutFile->is_open())
209                 {
210                     nexusOutFile->close();
211                 }
212                 utilityObject->info("NEXUS-Alignment file created    [%s]\n",
213                                     nexusOutName.c_str());                
214             }
215         }
216
217         if(userParameters->getOutputFasta())  
218         {
219             if(fastaOutFile.get() != 0 && fastaOutFile->is_open())
220             {
221                 fastaOut(alignPtr, partToOutput);
222                 if(fastaOutFile->is_open())
223                 {
224                     fastaOutFile->close();
225                 }
226                 utilityObject->info("Fasta-Alignment file created    [%s]\n",
227                                     fastaOutName.c_str());                
228             }
229         }
230         
231         // Output the alignment to the screen if it is required.
232         if (userParameters->getShowAlign() && userParameters->getMenuFlag()) 
233         {
234             showAlign();
235         }
236     }
237     catch(const exception& e)
238     {
239         cerr << "An exception has occured in the createAlignmentOutput function.\n"
240              << e.what() << "\n";
241         exit(1);    
242     }
243 }
244
245 bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames)
246 {
247     if(!userParameters->getOutputClustal() && 
248        !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
249        !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
250        !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) 
251     {
252          utilityObject->error("You must select an alignment output format\n");
253          return false;
254     }
255         
256     if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == ""
257        && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == ""
258        && fileNames.phylipFile == "")
259     {
260         utilityObject->error("No names for output files. Cannot output alignment.\n");
261         return false;
262     }
263     
264     if(fileNames.clustalFile != "")
265     {
266         clustalOutName = fileNames.clustalFile;
267         if(!openExplicitFile(clustalOutFile, clustalOutName))
268         { 
269             return false;
270         }
271     }  
272     if(fileNames.fastaFile != "")
273     {
274         fastaOutName = fileNames.fastaFile;
275         if(!openExplicitFile(fastaOutFile, fastaOutName))
276         { 
277             return false;
278         }
279     }
280     if(fileNames.gcgFile != "")
281     {
282         gcgOutName = fileNames.gcgFile;
283         if(!openExplicitFile(gcgOutFile, gcgOutName))
284         { 
285             return false;
286         }
287     }
288     if(fileNames.gdeFile != "")
289     {
290         gdeOutName = fileNames.gdeFile;
291         if(!openExplicitFile(gdeOutFile, gdeOutName))
292         { 
293             return false;
294         }
295     }
296     if(fileNames.nexusFile != "")
297     {
298         nexusOutName = fileNames.nexusFile;
299         if(!openExplicitFile(nexusOutFile, nexusOutName))
300         { 
301             return false;
302         }
303     }
304     if(fileNames.nrbfFile != "")
305     {
306         nbrfOutName = fileNames.nrbfFile;
307         if(!openExplicitFile(nbrfOutFile, nbrfOutName))
308         { 
309             return false;
310         }
311     }
312     if(fileNames.phylipFile != "")
313     {
314         phylipOutName = fileNames.phylipFile;
315         if(!openExplicitFile(phylipOutFile, phylipOutName))
316         { 
317             return false;
318         }
319     } 
320     return true;                         
321 }
322 /*
323  * The function openAlignmentOutput opens a file for output. It returns true if it
324  * has been successful and false if it has not been successful
325  */
326 bool AlignmentOutput::openAlignmentOutput(string path)
327 {
328     if(!userParameters->getOutputClustal() && 
329        !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
330        !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
331        !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) 
332     {
333          utilityObject->error("You must select an alignment output format\n");
334          return false;
335     }
336     string _fileNameToOutput = path;
337     if(_fileNameToOutput == "")
338     {
339       /*_fileNameToOutput = userParameters->getSeqName();*/
340       /* BUG 166 , file extension, FS, 2009-01-26 */
341       utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput);
342     }
343     
344     if(userParameters->getOutputClustal()) 
345     {
346         if (userParameters->getOutfileName() != "") 
347         {
348             clustalOutName = userParameters->getOutfileName();
349             if(!openExplicitFile(clustalOutFile, clustalOutName))
350             { 
351                 return false;
352             }
353         }
354         else 
355         {
356             clustalOutName = openOutputFile(clustalOutFile, 
357                              "\nEnter a name for the CLUSTAL output file ",
358                              _fileNameToOutput, "aln");
359                              
360             if(clustalOutName == "")
361             {
362                 return false; // We have not been successful.
363             }
364         }
365     }
366
367     if(userParameters->getOutputNbrf()) 
368     {
369         if (userParameters->getOutfileName() != "") 
370         {
371             nbrfOutName = userParameters->getOutfileName();
372             if(!openExplicitFile(nbrfOutFile, nbrfOutName))
373             { 
374                 return false;
375             }            
376         }
377         else
378         {
379             nbrfOutName = openOutputFile(nbrfOutFile,
380                       "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput,
381                        "pir");
382                       
383             if(nbrfOutName == "")
384             {
385                 return false; // We have not been successful.
386             }
387         }
388     }
389
390     if(userParameters->getOutputGCG()) 
391     {
392         if (userParameters->getOutfileName() != "") 
393         {
394             gcgOutName = userParameters->getOutfileName();
395             if(!openExplicitFile(gcgOutFile, gcgOutName))
396             { 
397                 return false;
398             }   
399         }
400         else
401         {
402             gcgOutName = openOutputFile(gcgOutFile,
403                       "\nEnter a name for the GCG output file", _fileNameToOutput, "msf");
404                       
405             if(gcgOutName == "")
406             {
407                 return false; // We have not been successful.
408             }
409         }
410     }
411
412     if(userParameters->getOutputPhylip()) 
413     {
414         if (userParameters->getOutfileName() != "") 
415         {
416             phylipOutName = userParameters->getOutfileName();
417             if(!openExplicitFile(phylipOutFile, phylipOutName))
418             { 
419                 return false;
420             }
421         }
422         else
423         {
424             phylipOutName = openOutputFile(phylipOutFile,
425                       "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy");
426                       
427             if(phylipOutName == "")
428             {
429                 return false; // We have not been successful.
430             }
431         }
432     }
433
434     if(userParameters->getOutputGde()) 
435     {
436         if (userParameters->getOutfileName() != "") 
437         {
438             gdeOutName = userParameters->getOutfileName();
439             if(!openExplicitFile(gdeOutFile, gdeOutName))
440             { 
441                 return false;
442             }
443         }
444         else
445         {
446             gdeOutName = openOutputFile(gdeOutFile, 
447                              "\nEnter a name for the GDE output file     ",
448                              _fileNameToOutput, "gde");
449                  
450             if(gdeOutName == "")
451             {
452                 return false; // We have not been successful.
453             }                       
454         }
455     }
456
457     if(userParameters->getOutputNexus()) 
458     {
459         if (userParameters->getOutfileName() != "") 
460         {
461             nexusOutName = userParameters->getOutfileName();
462             if(!openExplicitFile(nexusOutFile, nexusOutName))
463             { 
464                 return false;
465             }
466         }
467         else
468         {
469             nexusOutName = openOutputFile(nexusOutFile, 
470                              "\nEnter a name for the NEXUS output file   ",
471                              _fileNameToOutput, "nxs");
472                  
473             if(nexusOutName == "")
474             {
475                 return false; // We have not been successful.
476             }                       
477         }
478     }
479    
480     if(userParameters->getOutputFasta()) 
481     {
482         if (userParameters->getOutfileName() != "") 
483         {
484             fastaOutName = userParameters->getOutfileName();
485             if(!openExplicitFile(fastaOutFile, fastaOutName))
486             { 
487                 return false;
488             }
489         }
490         else
491         {
492             fastaOutName = openOutputFile(fastaOutFile, 
493                              "\nEnter a name for the Fasta output file ",
494                              _fileNameToOutput, "fasta");
495                  
496             if(fastaOutName == "")
497             {
498                 return false; // We have not been successful.
499             }
500         }
501     }
502   
503     return true;
504
505 }
506
507
508 /*
509  * The function openExplicitFile is used to open a file when we have the name already.
510  * It returns true if the file has been opened correctly, false otherwise.
511  */
512 bool AlignmentOutput::openExplicitFile(auto_ptr<ofstream>& outFile, string fileName)
513 {
514     if (fileName == "") 
515     {
516         cerr << "Bad output file [" << fileName << "]\n";
517         utilityObject->error("Bad output file [%s]\n", fileName.c_str());
518         return false;
519     }
520     
521     outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc));
522     if(!outFile->is_open()) 
523     {
524         utilityObject->error("Cannot open output file [%s]\n", fileName.c_str());
525         return false;
526     }
527     return true;
528 }
529
530 /*
531  * The function openOutputFile is used to open a file when we dont have the name yet.
532  * This function returns the name if it has been successful. If it has not been 
533  * successful it returns a blank string ""
534  */
535 string AlignmentOutput::openOutputFile(auto_ptr<ofstream>& outFile, string prompt, 
536                                        string path, string fileExtension)
537 {
538     string temp;
539     string _fileName; // Will return this name.
540     string message;
541     _fileName = path + fileExtension;
542
543     if(_fileName.compare(userParameters->getSeqName()) == 0) 
544     {
545         cout << "Output file name is the same as input file.\n";
546         if (userParameters->getMenuFlag()) 
547         {
548             message = "\n\nEnter new name to avoid overwriting  [" + _fileName + "]";
549
550             utilityObject->getStr(message, temp);
551             if(temp != "")
552             {
553                 _fileName = temp;
554             }
555         }
556     }
557     else if (userParameters->getMenuFlag()) 
558     {
559
560         message = prompt + " [" + _fileName + "]";
561         utilityObject->getStr(message, temp);
562         if(temp != "")
563         {
564             _fileName = temp;
565         }
566     }
567
568     outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc));
569     if(!outFile->is_open()) 
570     {
571         utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str());
572         return "";
573     }    
574     return _fileName;
575 }
576
577 /*
578  * fastaOut: output the alignment in FASTA format
579  */
580 void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput)
581 {
582     char residue;
583     int val;
584     int i, ii;
585     int j, slen;    
586     int lineLength;
587     int firstRes = partToOutput._firstRes;
588     int lastRes = partToOutput._lastRes;
589     int firstSeq = partToOutput._firstSeq;
590     int lastSeq = partToOutput._lastSeq;
591     rangeNum rnum;  
592     cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n";
593     try
594     {
595         const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29
596         vector<char> sequence;
597         sequence.assign(lastRes + 1, '0');     
598     
599         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
600                 
601         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
602         if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
603         {    
604             lineLength = LINELENGTH;
605         }
606
607
608         for(ii = firstSeq; ii <= lastSeq; ii++) 
609         {
610             i = alignPtr->getOutputIndex(ii - 1);
611             slen = 0;
612             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
613             for(j = firstRes; j <= lastRes; j++) //- nige
614             {
615                 if (j <= alignPtr->getSeqLength(i))
616                 {
617                     //val = alignPtr.getResidue(i, j);
618                     val = (*alignment)[i][j]; // NOTE june29
619                 }
620                 else
621                 { 
622                     val = -3;
623                 }
624                 if((val == -3) || (val == 253))
625                 {
626                     break;
627                 }
628                 else if((val < 0) || (val > userParameters->getMaxAA())) 
629                 {
630                     residue = '-';
631                 }
632                 else 
633                 {
634                     residue = userParameters->getAminoAcidCode(val);
635                 }
636                 if (userParameters->getLowercase())
637                 {    
638                     sequence[j - firstRes] = residue;
639                 }
640                 else
641                 {
642                     sequence[j - firstRes] = residue;
643                 }
644                 slen++;
645             }
646             (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i));
647             if(userParameters->getSeqRange()) 
648             {
649                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
650                 (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end;
651                 
652             }
653             (*fastaOutFile) << "\n";
654             for(j = 1; j <= slen; j++) 
655             {
656                 (*fastaOutFile) << sequence[j-1];
657                 if((j % lineLength == 0) || (j == slen))
658                 { 
659                     (*fastaOutFile) << "\n";
660                 }
661             }
662         }
663         fastaOutFile->close();
664         cout << "FASTA file created!\n";
665     }
666     catch(const bad_alloc& e)
667     {
668         fastaOutFile->close();
669         cerr << "A bad_alloc exception has occured in the fastaOut function.\n"
670              << e.what() << "\n";
671         exit(1);
672     }
673     catch(VectorOutOfRange e)
674     {
675         fastaOutFile->close();
676         cerr << "An exception has occured in the fastaOut function.\n"
677              << e.what() << "\n";
678         exit(1);    
679     }
680     catch(...)
681     {
682         fastaOutFile->close();
683         cerr << "An exception has occured in the fastaOut function.\n";
684         exit(1);    
685     }
686
687 }
688
689 /* 
690  * gcgOut: output the alignment in gcg file format
691  */
692 void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput)
693 {
694     char residue;
695     int val;
696     int i, ii, chunks, block;
697     int j, k, pos1, pos2;    
698     long grandChecksum;
699  
700     try
701     {  
702         int firstRes = partToOutput._firstRes;
703         int lastRes = partToOutput._lastRes;
704         int firstSeq = partToOutput._firstSeq;
705         int lastSeq = partToOutput._lastSeq;    
706         rangeNum rnum;
707         vector<char> sequence;
708         vector<int> allChecks;
709         int _maxLength = alignPtr->getMaxAlnLength();
710         sequence.assign(_maxLength + 1, '0');
711         allChecks.assign(lastSeq + 1, 0);
712         const SeqArray* alignment = alignPtr->getSeqArray();
713         
714         for(i = firstSeq; i <= lastSeq; i++) 
715         {
716             //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige
717             for(j = firstRes; j <= lastRes; j++) //- nige
718             {
719                 if (j <= alignPtr->getSeqLength(i))
720                 {
721                     //val = alignPtr.getResidue(i, j);
722                     val = (*alignment)[i][j]; // NOTE june29
723                 }
724                 else
725                 { 
726                     val = -3;
727                 }
728                 if((val == -3) || (val == 253)) 
729                 {
730                     break;
731                 }
732                 else if((val < 0) || (val > userParameters->getMaxAA()))
733                 {
734                     residue = '.';
735                 }
736                 else 
737                 {
738                     residue = userParameters->getAminoAcidCode(val);
739                 }
740                 sequence[j - firstRes + 1] = residue;
741             }
742             // pad any short sequences with gaps, to make all sequences the same length 
743             for(; j <= firstRes + lastRes - 1; j++)
744             { 
745                 sequence[j - firstRes + 1] = '.';
746             }
747             allChecks[i] = SeqGCGCheckSum(&sequence, lastRes);
748         }    
749         int _index;
750         grandChecksum = 0;
751         for(i = 1; i <= alignPtr->getNumSeqs(); i++)
752         {
753             _index = alignPtr->getOutputIndex(i - 1);
754             grandChecksum += allChecks[_index];
755         }
756         grandChecksum = grandChecksum % 10000;
757         
758         (*gcgOutFile) << "PileUp\n\n";
759         (*gcgOutFile) << "\n\n   MSF:" << setw(5) << lastRes << "  Type: ";
760                         
761         if(userParameters->getDNAFlag())
762         {
763             (*gcgOutFile) << "N";
764         }
765         else
766         {
767             (*gcgOutFile) << "P";
768         }
769
770         (*gcgOutFile) << "    Check:" << setw(6) << grandChecksum << "   .. \n\n";
771         float _seqWeight;
772
773         int length = lastRes - firstRes + 1; //- nige
774
775         for(ii = firstSeq; ii <= lastSeq; ii++)  
776         {
777             i = alignPtr->getOutputIndex(ii - 1);
778             _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR;
779
780             (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo  Len:"
781                         //<< setw(5) << lastRes << "  Check:" << setw(6) << allChecks[i]  //- nige
782                           << setw(5) << length << "  Check:" << setw(6) << allChecks[i] 
783                           << "  Weight:  " << fixed << setprecision(1) << _seqWeight << "\n";
784         }  
785         (*gcgOutFile) << "\n//\n";
786         
787         chunks = length / GCG_LINELENGTH; //- nige
788         if(length % GCG_LINELENGTH != 0)  //- nige
789         {
790             ++chunks;
791         }
792   
793         for(block = 1; block <= chunks; block++) 
794         {
795             (*gcgOutFile) << "\n\n";
796             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
797             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige
798             
799             for(ii = firstSeq; ii <= lastSeq; ii++) 
800             {
801                 i = alignPtr->getOutputIndex(ii - 1);
802                 if (!userParameters->getSeqRange()) 
803                 {
804                     (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left
805                                   << alignPtr->getName(i) << " ";
806                 }
807                 else 
808                 {
809                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
810                     std::stringstream ss;
811                     std::stringstream ss2;
812                     string rangeStart;
813                     string rangeEnd;
814                     ss << rnum.start;
815                     ss >> rangeStart;
816                     ss2 << rnum.end;
817                     ss2 >> rangeEnd;
818                     string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart;
819                     nameAndRange += "-" + rangeEnd;
820                     (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left
821                                   << nameAndRange;
822                 }
823                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
824                 {
825             
826                     //JULIE -
827                     //check for int sequences - pad out with '.' characters to end 
828             
829                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
830                     {
831                         //val = alignPtr.getResidue(i, j + firstRes - 1);
832                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
833                     }
834                     else
835                     {
836                         val = -3;
837                     }
838                     if((val == -3) || (val == 253))
839                     {
840                         residue = '.';
841                     }
842                     else if((val < 0) || (val > userParameters->getMaxAA()))
843                     {
844                         residue = '.';
845                     }
846                     else 
847                     {
848                         residue = userParameters->getAminoAcidCode(val);
849                     }
850                     (*gcgOutFile) << residue;
851                     if(j % 10 == 0)
852                     { 
853                         (*gcgOutFile) << " ";
854                     }
855                 }
856             }
857         }
858         (*gcgOutFile) << "\n\n";
859         gcgOutFile->close();   
860     }
861     catch(const bad_alloc& e)
862     {
863         gcgOutFile->close();
864         cerr << "A bad_alloc exception has occured in the gcgOut function.\n"
865              << e.what() << "\n";
866         exit(1);
867     }
868     catch(VectorOutOfRange e)
869     {
870         gcgOutFile->close();
871         cerr << "An exception has occured in the gcgOut function.\n"
872              << e.what() << "\n";
873         exit(1);    
874     }
875     catch(...)
876     {
877         gcgOutFile->close();
878         cerr << "An exception has occured in the gcgOut function.\n";
879         exit(1);    
880     }    
881 }
882
883 void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput)
884 {
885     int firstRes = partToOutput._firstRes;
886     int lastRes = partToOutput._lastRes;
887     int firstSeq = partToOutput._firstSeq;
888     int lastSeq = partToOutput._lastSeq;    
889     try
890     {
891         char residue;
892         int val;
893         int i, ii, chunks, block;    
894         int j, k, pos1, pos2;    
895         int nameLen;
896         bool warn;
897         vector<string> _seqNames;
898         const SeqArray* alignment = alignPtr->getSeqArray();
899         rangeNum rnum;
900
901         // Push on a blank string. This is because the index starts at 1 for the seqs etc..
902         _seqNames.push_back("");
903
904         nameLen = 0;
905     
906         for(i = firstSeq; i <= lastSeq; i++)  
907         {
908             _seqNames.push_back(alignPtr->getName(i));
909             ii = _seqNames.size();
910             if(nameLen < ii) 
911             {
912                 nameLen = ii;
913             }
914         }
915         if(nameLen > 10) 
916         {
917             warn = false;
918             for(i = 0; i < (int)_seqNames.size() - 1; i++)  
919             {
920                 for(j = i + 1; j < (int)_seqNames.size(); j++) 
921                 {
922                     if (_seqNames[i].compare(_seqNames[j]) == 0) 
923                     {
924                         warn = true;
925                     }
926                 }
927             }
928             if(warn)
929             {
930                 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous.");
931             }
932             else
933             {
934                 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.");
935             }
936         }
937   
938         int length = lastRes - firstRes + 1; //- nige
939
940         chunks = length / GCG_LINELENGTH; //- nige
941         if(length % GCG_LINELENGTH != 0)  //- nige
942         {
943             ++chunks;
944         }
945   
946         (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " "
947                          << setw(6) << length; //-nige
948         
949         for(block = 1; block <= chunks; block++) 
950         {
951             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
952             //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige
953             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;
954             for(ii = firstSeq; ii <= lastSeq; ii++)  
955             {
956                 i = alignPtr->getOutputIndex(ii - 1);
957                 if(block == 1)  
958                 {
959                     if(!userParameters->getSeqRange()) 
960                     {
961                         string name = alignPtr->getName(i);
962                         (*phylipOutFile) << "\n" << setw(10) << left 
963                                              << name.substr(0,10) << " ";
964                     }
965                     else
966                     {
967                         findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
968                         std::stringstream ss;
969                         std::stringstream ss2;
970                         string rangeStart;
971                         string rangeEnd;
972                         ss << rnum.start;
973                         ss >> rangeStart;
974                         ss2 << rnum.end;
975                         ss2 >> rangeEnd;
976                         string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
977                         nameAndRange += rangeStart + "-" + rangeEnd;
978                         (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
979                                          << left
980                                          << nameAndRange;
981                     }
982                 }
983                 else
984                 {
985                     (*phylipOutFile) << "\n           ";
986                 }
987                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
988                 {
989                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
990                     {
991                         //val = alignPtr.getResidue(i, j + firstRes - 1);
992                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
993                     }
994                     else
995                     {
996                         val = -3;
997                     }
998                     if((val == -3) || (val == 253))
999                     {
1000                         break;
1001                     }
1002                     else if((val < 0) || (val > userParameters->getMaxAA()))
1003                     {
1004                         residue = '-';
1005                     }
1006                     else 
1007                     {
1008                         residue = userParameters->getAminoAcidCode(val);
1009                     }
1010                     (*phylipOutFile) << residue;
1011                     if(j % 10 == 0) 
1012                     {
1013                         (*phylipOutFile) << " ";
1014                     }
1015                 }
1016             }
1017             (*phylipOutFile) << "\n";
1018         }  
1019     }
1020     catch(const bad_alloc& e)
1021     {
1022         phylipOutFile->close();
1023         cerr << "A bad_alloc exception has occured in the phylipOut function.\n"
1024              << e.what() << "\n";
1025         exit(1);
1026     }
1027     catch(VectorOutOfRange e)
1028     {
1029         phylipOutFile->close();
1030         cerr << "An exception has occured in the phylipOut function.\n"
1031              << e.what() << "\n";
1032         exit(1);    
1033     }
1034     catch(...)
1035     {
1036         phylipOutFile->close();
1037         cerr << "An exception has occured in the phylipOut function.\n";
1038         exit(1);    
1039     }      
1040 }
1041
1042 void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput)
1043 {
1044     int firstRes = partToOutput._firstRes;
1045     int lastRes = partToOutput._lastRes;
1046     int firstSeq = partToOutput._firstSeq;
1047     int lastSeq = partToOutput._lastSeq;    
1048     try
1049     {
1050         char residue;
1051         int val;
1052         int i, ii, chunks, block;    
1053         int j, k, pos1, pos2;    
1054         const SeqArray* alignment = alignPtr->getSeqArray();
1055         rangeNum rnum;
1056
1057         int length = lastRes - firstRes + 1; //- nige
1058
1059         chunks = length / GCG_LINELENGTH; //- nige
1060         if(length % GCG_LINELENGTH != 0) //- nige
1061         {
1062             ++chunks;
1063         }
1064   
1065         (*nexusOutFile) << "#NEXUS\n";
1066         (*nexusOutFile) << "BEGIN DATA;\n";
1067         (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs() 
1068                         << " nchar=" << length << ";\n"; //- nige
1069         (*nexusOutFile) << "format missing=?\n";
1070
1071         // removed - bugzilla bug 204
1072         /*
1073         (*nexusOutFile) << "symbols=\"";
1074         
1075         for(i = 0; i <= userParameters->getMaxAA(); i++)
1076         {
1077             (*nexusOutFile) << userParameters->getAminoAcidCode(i);
1078         }
1079         (*nexusOutFile) << "\"\n";
1080         */
1081
1082         (*nexusOutFile) << "interleave datatype=";
1083         bool _dnaFlag = userParameters->getDNAFlag();
1084         string _type = _dnaFlag ? "DNA " : "PROTEIN ";
1085         (*nexusOutFile) << _type;
1086         (*nexusOutFile) << "gap= -;\n";
1087         (*nexusOutFile) << "\nmatrix";
1088   
1089         for(block = 1; block <= chunks; block++) 
1090         {
1091             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
1092             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige
1093             for(ii = firstSeq; ii <= lastSeq; ii++)  
1094             {
1095                 i = alignPtr->getOutputIndex(ii - 1);
1096                 if (!userParameters->getSeqRange()) 
1097                 {
1098                     (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left
1099                                     << alignPtr->getName(i) << " ";
1100                 }
1101                 else 
1102                 {
1103                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1104                     
1105                     std::stringstream ss;
1106                     std::stringstream ss2;
1107                     string rangeStart;
1108                     string rangeEnd;
1109                     ss << rnum.start;
1110                     ss >> rangeStart;
1111                     ss2 << rnum.end;
1112                     ss2 >> rangeEnd;
1113                     string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
1114                     nameAndRange += rangeStart + "-" + rangeEnd;
1115                     (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
1116                                     << left
1117                                     << nameAndRange;                    
1118                 }
1119                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
1120                 {
1121                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1122                     {
1123                         //val = alignPtr.getResidue(i, j + firstRes - 1);
1124                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1125                     }
1126                     else
1127                     {
1128                         val = -3;
1129                     }
1130                     if((val == -3) || (val == 253))
1131                     {
1132                         break;
1133                     }
1134                     else if((val < 0) || (val > userParameters->getMaxAA()))
1135                     {
1136                         residue = '-';
1137                     }
1138                     else 
1139                     {
1140                         residue = userParameters->getAminoAcidCode(val);
1141                     }
1142                     (*nexusOutFile) << residue;
1143                 }
1144             }
1145             (*nexusOutFile) << "\n";
1146         }
1147         (*nexusOutFile) << ";\nend;\n";   
1148     }
1149     catch(const bad_alloc& e)
1150     {
1151         nexusOutFile->close();
1152         cerr << "A bad_alloc exception has occured in the nexusOut function.\n"
1153              << e.what() << "\n";
1154         exit(1);
1155     }
1156     catch(VectorOutOfRange e)
1157     {
1158         nexusOutFile->close();
1159         cerr << "An exception has occured in the nexusOut function.\n"
1160              << e.what() << "\n";
1161         exit(1);    
1162     }
1163     catch(...)
1164     {
1165         nexusOutFile->close();
1166         cerr << "An exception has occured in the nexusOut function.\n";
1167         exit(1);    
1168     }      
1169 }
1170
1171 /*
1172  * gdeOut: print out the alignment in gde file format.
1173  */
1174 void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput)
1175 {
1176     int firstRes = partToOutput._firstRes;
1177     int lastRes = partToOutput._lastRes;
1178     int firstSeq = partToOutput._firstSeq;
1179     int lastSeq = partToOutput._lastSeq;    
1180
1181     int length = lastRes - firstRes + 1; //- nige
1182
1183     try
1184     {
1185         char residue;
1186         int val;
1187         vector<char> _ssMask1, _ssMask2, seq;
1188         int i, ii;
1189         int j, slen;    
1190         int lineLength;
1191         const SeqArray* alignment = alignPtr->getSeqArray();
1192         rangeNum rnum;
1193
1194         seq.assign(alignPtr->getMaxAlnLength() + 1, '0');
1195         
1196         // decide the line length for this alignment - maximum is LINELENGTH 
1197         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1198         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1199         if (lineLength > LINELENGTH || lineLength <= 0) 
1200         {
1201             lineLength = LINELENGTH;
1202         }
1203   
1204         // If we are using the secondary structure info from profile 1, set it up
1205         if (userParameters->getStructPenalties1() == SECST && 
1206             userParameters->getUseSS1() == true) 
1207         {
1208             int _lengthSeq1 = alignPtr->getSeqLength(1);
1209             vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1210             _ssMask1.assign(_lengthSeq1 + 10, 0);
1211             
1212             for (i = 0;i < _lengthSeq1;i++)
1213             {
1214                 _ssMask1[i] = _secStructMask1->at(i);
1215             }
1216             printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1);
1217         }
1218         
1219         // If we are using the secondary structure info from profile 2, set it up
1220         if (userParameters->getStructPenalties2() == SECST && 
1221             userParameters->getUseSS2() == true) 
1222         {
1223             int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1224             int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1225             _ssMask2.assign(lengthSeqProfile2 + 10, 0);
1226             vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1227             
1228             for (i=0;i < lengthSeqProfile2;i++)
1229             {
1230                 _ssMask2[i] = _secStructMask2->at(i);
1231             }
1232             printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2);  
1233         }
1234
1235         bool _dnaFlag = userParameters->getDNAFlag();
1236         string _prefix = _dnaFlag ? "#" : "%";
1237         
1238         for(ii = firstSeq; ii <= lastSeq; ii++) 
1239         {
1240             i = alignPtr->getOutputIndex(ii - 1);
1241             
1242             (*gdeOutFile) << _prefix;
1243             if(!userParameters->getSeqRange()) 
1244             {
1245                 (*gdeOutFile) << alignPtr->getName(i) << "\n";
1246             }
1247             else 
1248             {
1249                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1250                 (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/"
1251                               << rnum.start << "-" << rnum.end << "\n";
1252             }
1253             slen = 0;
1254
1255             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1256             for(j = firstRes; j <= lastRes; j++) //- nige
1257             {
1258                 if (j <= alignPtr->getSeqLength(i))
1259                 {
1260                     //val = alignPtr.getResidue(i, j);
1261                     val = (*alignment)[i][j]; // NOTE june29
1262                 }
1263                 else
1264                 { 
1265                     val = -3;
1266                 }
1267                 if((val == -3) || (val == 253))
1268                 {
1269                     break;
1270                 }
1271                 else if((val < 0) || (val > userParameters->getMaxAA()))
1272                 {
1273                     residue = '-';
1274                 }
1275                 else 
1276                 {
1277                     residue = userParameters->getAminoAcidCode(val);
1278                 }
1279                 if (userParameters->getLowercase())
1280                 {
1281                     seq[j - firstRes] = (char)tolower((int)residue);
1282                 }
1283                 else
1284                 {
1285                     seq[j - firstRes] = residue;
1286                 }
1287                 slen++;
1288             }
1289             for(j = 1; j <= slen; j++) 
1290             {
1291                 (*gdeOutFile) << seq[j-1];
1292                 if((j % lineLength == 0) || (j == slen)) 
1293                 {
1294                     (*gdeOutFile) << "\n";
1295                 }
1296             }
1297         }
1298   
1299         if (userParameters->getOutputStructPenalties() == 0 || 
1300             userParameters->getOutputStructPenalties() == 2) 
1301         {
1302             if (userParameters->getStructPenalties1() == SECST && 
1303                 userParameters->getUseSS1() == true) 
1304             {
1305                 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1306                               << alignPtr->getSecStructName1() << "\n";
1307                               
1308                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1309                 for(i = firstRes; i <= lastRes; i++) //- nige
1310                 {
1311                     val = _ssMask1[i - 1];
1312                     if (val == userParameters->getGapPos1() || 
1313                         val == userParameters->getGapPos2())
1314                     {
1315                         seq[i - firstRes] = '-';
1316                     }
1317                     else
1318                     {
1319                         seq[i - firstRes] = val;
1320                     }
1321                 }
1322
1323                 for(i = 1; i <= lastRes; i++) 
1324                 {
1325                     (*gdeOutFile) << seq[i-1];
1326                     if((i % lineLength == 0) || (i == lastRes)) 
1327                     {
1328                         (*gdeOutFile) << "\n";
1329                     }
1330                 }
1331             }
1332     
1333             if (userParameters->getStructPenalties2() == SECST && 
1334                 userParameters->getUseSS2() == true) 
1335             {
1336                 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1337                               << alignPtr->getSecStructName2() << "\n";
1338                                               
1339                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1340                 for(i = firstRes; i <= lastRes; i++) //- nige
1341                 {
1342                     val = _ssMask2[i - 1];
1343                     if (val == userParameters->getGapPos1() || 
1344                         val == userParameters->getGapPos2())
1345                     {
1346                         seq[i - firstRes] = '-';
1347                     }
1348                     else
1349                     {
1350                         seq[i - firstRes] = val;
1351                     }
1352                 }
1353
1354                 //for(i = 1; i <= lastRes; i++) //- nige
1355                 for(i = 1; i <= length; i++) //- nige
1356                 {
1357                     (*gdeOutFile) << seq[i - 1];
1358                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1359                     if((i % lineLength == 0) || (i == length)) //- nige
1360                     {
1361                         (*gdeOutFile) << "\n";
1362                     }
1363                 }
1364             }
1365         }
1366         if (userParameters->getOutputStructPenalties() == 1 || 
1367             userParameters->getOutputStructPenalties() == 2) 
1368         {
1369             if (userParameters->getStructPenalties1() != NONE && 
1370                 userParameters->getUseSS1() == true) 
1371             {
1372                 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1373                               << alignPtr->getSecStructName1() << "\n";
1374                 
1375                 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();          
1376                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1377                 for(i = firstRes; i <= lastRes; i++) //- nige
1378                 {
1379                     val = _gapPenaltyMask1->at(i - 1);
1380                     if (val == userParameters->getGapPos1() || 
1381                         val == userParameters->getGapPos2())
1382                     {
1383                         seq[i - firstRes] = '-';
1384                     }
1385                     else
1386                     {
1387                         seq[i - firstRes] = val;
1388                     }
1389                 }
1390
1391                 //for(i = 1; i <= lastRes; i++) //- nige
1392                 for(i = 1; i <= length; i++) //- nige
1393                 {
1394                     (*gdeOutFile) << seq[i - 1];
1395                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1396                     if((i % lineLength == 0) || (i == length)) //- nige
1397                     {
1398                         (*gdeOutFile) << "\n";
1399                     }
1400                 }
1401             }
1402             if (userParameters->getStructPenalties2() != NONE && 
1403                 userParameters->getUseSS2() == true) 
1404             {
1405                 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1406                               << alignPtr->getSecStructName2() << "\n";
1407                 
1408                 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();    
1409                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1410                 for(i = firstRes; i < length; i++) //- nige
1411                 {
1412                     val = _gapPenaltyMask2->at(i-1);
1413                     if (val == userParameters->getGapPos1() || 
1414                         val == userParameters->getGapPos2())
1415                     {
1416                         seq[i - firstRes] = '-';
1417                     }
1418                     else
1419                     {
1420                         seq[i - firstRes] = val;
1421                     }
1422                 }
1423
1424                 //for(i = 1; i <= lastRes; i++) //- nige
1425                 for(i = 1; i <= length; i++) //- nige
1426                 {
1427                     (*gdeOutFile) << seq[i - 1];
1428                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1429                     if((i % lineLength == 0) || (i == length)) //- nige
1430                     {
1431                         (*gdeOutFile) << "\n";
1432                     }
1433                 }
1434             }
1435         }
1436         gdeOutFile->close();   
1437     }
1438     catch(const bad_alloc& e)
1439     {
1440         gdeOutFile->close();
1441         cerr << "A bad_alloc exception has occured in the gdeOut function.\n"
1442              << e.what() << "\n";
1443         exit(1);
1444     }
1445     catch(VectorOutOfRange e)
1446     {
1447         gdeOutFile->close();
1448         cerr << "An exception has occured in the gdeOut function.\n"
1449              << e.what() << "\n";
1450         exit(1);    
1451     }
1452     catch(...)
1453     {
1454         gdeOutFile->close();
1455         cerr << "An exception has occured in the gdeOut function.\n";
1456         exit(1);    
1457     }     
1458 }
1459
1460 void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput)
1461 {
1462     char residue;
1463     int val;
1464     int i, ii;
1465     int j, slen;    
1466     int lineLength;
1467     rangeNum rnum;
1468     int firstRes = partToOutput._firstRes;
1469     int lastRes = partToOutput._lastRes;
1470     int firstSeq = partToOutput._firstSeq;
1471     int lastSeq = partToOutput._lastSeq;
1472             
1473     try
1474     {
1475         int _maxLength = alignPtr->getMaxAlnLength();
1476         vector<char> sequence;
1477         sequence.assign(_maxLength + 1, '0');
1478         const SeqArray* alignment = alignPtr->getSeqArray();
1479         // decide the line length for this alignment - maximum is LINELENGTH 
1480         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1481         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1482         if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07
1483         {
1484             lineLength = LINELENGTH;
1485         }
1486         
1487         // Get name prefix. DL if DNA, P1 if protein
1488         string namePrefix;
1489         bool _dnaFlag = userParameters->getDNAFlag();
1490         namePrefix = _dnaFlag ? ">DL;" : ">P1;";
1491         
1492         //int length = lastRes - firstRes + 1; //- nige
1493
1494         for(ii = firstSeq; ii <= lastSeq; ii++) 
1495         {
1496             i = alignPtr->getOutputIndex(ii - 1);
1497             
1498             (*nbrfOutFile) << namePrefix;
1499             
1500             if (!userParameters->getSeqRange()) 
1501             {
1502                 (*nbrfOutFile) << alignPtr->getName(i) << "\n"
1503                                << alignPtr->getTitle(i) << "\n";
1504             }
1505             else 
1506             {
1507                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1508                 (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start
1509                                << "-"<< rnum.end << "\n"
1510                                << alignPtr->getTitle(i) << "\n";
1511             }
1512             slen = 0;
1513             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1514             for(j = firstRes; j <= lastRes; j++) //- nige
1515             {
1516                 // NOTE I changed this here!!!!!
1517                 if (j <= alignPtr->getSeqLength(i))
1518                 {
1519                     //val = alignPtr.getResidue(i, j);
1520                     val = (*alignment)[i][j]; // NOTE june29
1521                 }
1522                 else
1523                 { 
1524                     val = -3;
1525                 }
1526                 if((val == -3) || (val == 253))
1527                 {
1528                     break;
1529                 }
1530                 else if((val < 0) || (val > userParameters->getMaxAA()))
1531                 {
1532                     residue = '-';
1533                 }
1534                 else 
1535                 {
1536                     residue = userParameters->getAminoAcidCode(val);
1537                 }
1538                 sequence[j - firstRes] = residue;
1539                 slen++;
1540             }
1541             for(j = 1; j <= slen; j++) 
1542             {
1543                 (*nbrfOutFile) << sequence[j - 1];
1544                 if((j % lineLength == 0) || (j == slen))
1545                 { 
1546                     (*nbrfOutFile) << "\n";
1547                 }
1548             }
1549             (*nbrfOutFile) << "*\n";
1550         }    
1551         nbrfOutFile->close();
1552     }
1553     catch(const bad_alloc& e)
1554     {
1555         nbrfOutFile->close();
1556         cerr << "A bad_alloc exception has occured in the nbrfOut function.\n"
1557              << e.what() << "\n";
1558         exit(1);
1559     }
1560     catch(VectorOutOfRange e)
1561     {
1562         nbrfOutFile->close();
1563         cerr << "An exception has occured in the nbrfOut function.\n"
1564              << e.what() << "\n";
1565         exit(1);    
1566     }
1567     catch(...)
1568     {
1569         nbrfOutFile->close();
1570         cerr << "An exception has occured in the nbrfOut function.\n";
1571         exit(1);    
1572     }    
1573 }
1574
1575 /* 
1576  * The function clustalOut is used to ouput the Alignment into a clustal format
1577  * file.
1578  */
1579 void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput)
1580 {
1581     int firstRes = partToOutput._firstRes;
1582     int lastRes = partToOutput._lastRes;
1583     int firstSeq = partToOutput._firstSeq;
1584     int lastSeq = partToOutput._lastSeq;
1585         
1586     try
1587     {
1588         vector<char> seq1;    
1589         vector<int> seqNo;
1590         vector<int> printSeqNo;    
1591         vector<char> ss_mask1, ss_mask2;
1592         const SeqArray* alignment = alignPtr->getSeqArray();
1593         char  temp[MAXLINE];
1594         char c;
1595         int val;
1596         int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks;
1597         int i, j, k, l;
1598         int pos, ptr;
1599         int lineLength;
1600
1601         rangeNum rnum;
1602         string tmpStr = "";
1603         string sequenceLine = "";
1604
1605
1606         int _numSequences = alignPtr->getNumSeqs();
1607         // PMcG revert to Clustalw1.83 output format for name width
1608         const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY); 
1609
1610         seqNo.assign(_numSequences + 1, 0);
1611         printSeqNo.assign(_numSequences + 1, 0);
1612     
1613         // check that lastSeq <= _numSequences
1614         if(lastSeq > _numSequences)
1615         {
1616             lastSeq = _numSequences;
1617         }
1618     
1619         for (i = firstSeq; i <= lastSeq; i++)
1620         {
1621             printSeqNo[i] = seqNo[i] = 0;
1622             for(j = 1; j < firstRes; j++) 
1623             {
1624                 //val = alignPtr.getResidue(i, j);
1625                 val = (*alignment)[i][j]; // NOTE june29
1626                 if((val >= 0) || (val <= userParameters->getMaxAA()))
1627                 { 
1628                     seqNo[i]++;
1629                 }
1630             }
1631         }
1632
1633         seq1.assign(alignPtr->getMaxAlnLength() + 1, 0);
1634         // Check if we have secondary structure in file 1 and if we want to output it.
1635         if (userParameters->getStructPenalties1() == SECST && 
1636             userParameters->getUseSS1() == true) 
1637         {
1638             int lengthSeq1 = alignPtr->getSeqLength(1);
1639             ss_mask1.assign(lengthSeq1 + 10, 0);
1640             vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1641         
1642             for (i = 0; i < lengthSeq1; i++)
1643             {
1644                 ss_mask1[i] = _secStructMask1->at(i);
1645             }
1646             printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1);
1647           
1648         }
1649         
1650         // Check if we have secondary structure in file 2 and if we want to output it.
1651         if (userParameters->getStructPenalties2() == SECST && 
1652             userParameters->getUseSS2() == true &&
1653             userParameters->getProfile2Empty() == false)
1654         {
1655             // AW added test getProfile2Empty was meant to fix bug
1656             // 141, but only prevents segfault, output still faulty.
1657             // Has to be fixed somewhere else
1658             int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1659             int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1660             ss_mask2.assign(lengthSeqProfile2 + 10, 0);
1661             vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1662             for (i = 0; i < lengthSeqProfile2; i++)
1663             {
1664                 ss_mask2[i] = _secStructMask2->at(i);
1665             }
1666             printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2);
1667         }
1668     
1669         (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel() 
1670                           << " multiple sequence alignment\n\n";
1671     
1672         // decide the line length for this alignment - maximum is LINELENGTH
1673
1674         //PMcG  9-2-2008 make line output same as 1.83
1675         //lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1676         lineLength = PAGEWIDTH - NAMESWIDTH;
1677         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1678         if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
1679         { 
1680             lineLength = LINELENGTH;
1681         }
1682     
1683         int length = lastRes - firstRes + 1; //- nige
1684
1685         chunks = length / lineLength; //- nige
1686         if(length % lineLength != 0) //- nige
1687         {
1688           ++chunks;
1689         }
1690         //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n",
1691         //       firstRes, lastRes, length, chunks);
1692
1693         // This will loop through each of the blocks.
1694         for(lv1 = 1; lv1 <= chunks; ++lv1) 
1695         {
1696             // pos is begining of chunk, ptr is the end of the chunk to be displayed.
1697             pos = ((lv1 - 1) * lineLength) + 1; 
1698             ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige
1699    
1700             (*clustalOutFile) << "\n";
1701             int _outStructPenalty = userParameters->getOutputStructPenalties();
1702             
1703             string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07
1704             
1705             if((int)secStructName1.size() > MAXNAMESTODISPLAY)
1706             {
1707                 //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY);
1708                 secStructName1 = secStructName1.substr(0, NAMESWIDTH);
1709             }            
1710             if (_outStructPenalty == 0 || _outStructPenalty == 2) 
1711             {
1712                 if (userParameters->getStructPenalties1() == SECST && 
1713                     userParameters->getUseSS1() == true) 
1714                 {
1715                     for(i = pos; i <= ptr; ++i) 
1716                     {
1717                         // Check if we can access mask position first
1718                         if((int)ss_mask1.size() > i + firstRes - 2)  
1719                         {
1720                             val = ss_mask1[i + firstRes - 2]; 
1721                             if (val == userParameters->getGapPos1() || 
1722                                 val == userParameters->getGapPos2())
1723                             {
1724                                 temp[i - pos] = '-';
1725                             }
1726                             else
1727                             {
1728                                 temp[i - pos] = val;
1729                             }
1730                         }
1731                         else
1732                         {
1733                             break;
1734                         }
1735                     }
1736                     temp[i - pos] = EOS;  
1737                                       
1738                     if(userParameters->getSeqRange())
1739                     {
1740                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1741                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1742                                                             clusSecStructOffset)
1743                                           << left << secStructName1 
1744                                           << "  " << temp << "\n";
1745                     }
1746                     else
1747                     {
1748                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1749                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1750                                           << left << secStructName1 << "  " 
1751                                           << temp << "\n";
1752                     }
1753                 }
1754             }
1755             if (_outStructPenalty == 1 || _outStructPenalty == 2) 
1756             {
1757                 if (userParameters->getStructPenalties1() != NONE && 
1758                     userParameters->getUseSS1() == true) 
1759                 {
1760                     vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();       
1761                     for(i = pos; i <= ptr; ++i) 
1762                     {
1763                         if((int)_gapPenaltyMask1->size() > i + firstRes - 2) 
1764                         {
1765                             val = _gapPenaltyMask1->at(i + firstRes - 2); 
1766                             if (val == userParameters->getGapPos1() || 
1767                                 val == userParameters->getGapPos2())
1768                             {
1769                                 temp[i - pos] = '-';
1770                             }
1771                             else
1772                             {
1773                                 temp[i - pos] = val;
1774                             }
1775                         }
1776                         else
1777                         {
1778                             break;
1779                         }
1780                     }
1781                     temp[i - pos] = EOS;
1782                     
1783                     //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1784                     (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1785                                       << secStructName1 << "  "
1786                                       << temp << "\n";
1787                 }
1788             }
1789             
1790             
1791             string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07
1792             //if((int)secStructName2.size() > MAXNAMESTODISPLAY)
1793             if((int)secStructName2.size() > NAMESWIDTH)
1794             {
1795                 //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY);
1796                 secStructName2 = secStructName2.substr(0, NAMESWIDTH);
1797             }
1798                                 
1799             if (_outStructPenalty == 0 || _outStructPenalty == 2) 
1800             {
1801                 if (userParameters->getStructPenalties2() == SECST && 
1802                     userParameters->getUseSS2() == true) 
1803                 {
1804                     for(i = pos; i <= ptr; ++i) 
1805                     {
1806                         if((int)ss_mask2.size() > i + firstRes - 2)
1807                         {
1808                             val = ss_mask2[i + firstRes - 2]; 
1809                             if (val == userParameters->getGapPos1() || 
1810                                 val == userParameters->getGapPos2())
1811                             {
1812                                 temp[i - pos] = '-';
1813                             }
1814                             else
1815                             {
1816                                 temp[i - pos] = val;
1817                             }
1818                         }
1819                         else
1820                         {
1821                             break;
1822                         }
1823                     }
1824                     temp[i - pos] = EOS;
1825                     
1826                                         
1827                     if (userParameters->getSeqRange())
1828                     {
1829                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1830                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1831                                                             clusSecStructOffset)
1832                                           << left << secStructName2; 
1833                         (*clustalOutFile) << "  " << temp << "\n";
1834                     }
1835                     else
1836                     {
1837                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1838                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1839                                           << left << secStructName2 << "  " 
1840                                           << temp << "\n";
1841                     }
1842                 }
1843             }
1844             
1845             if (_outStructPenalty == 1 || _outStructPenalty == 2) 
1846             {
1847                 if (userParameters->getStructPenalties2() != NONE && 
1848                     userParameters->getUseSS2() == true) 
1849                 {
1850                     vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1851                     for(i = pos; i <= ptr; ++i) 
1852                     {
1853                         if((int)_gapPenaltyMask2->size() > i + firstRes - 2)
1854                         {
1855                             val = _gapPenaltyMask2->at(i + firstRes - 2); 
1856                             if (val == userParameters->getGapPos1() || 
1857                                 val == userParameters->getGapPos2())
1858                             {
1859                                 temp[i - pos] = '-';
1860                             }
1861                             else
1862                             {
1863                                 temp[i - pos] = val;
1864                             }
1865                         }
1866                         else
1867                         {
1868                             break;
1869                         }
1870                     }
1871                     temp[i - pos] = EOS;
1872                     
1873                     //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1874                     (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1875                                       << secStructName2 << "  "
1876                                       << temp << "\n";
1877                 }
1878             }
1879         
1880             for(ii = firstSeq; ii <= lastSeq; ++ii) 
1881             {
1882                 i = alignPtr->getOutputIndex(ii - 1); 
1883                 printSeqNo[i] = 0;
1884                 for(j = pos; j <= ptr; ++j) 
1885                 {
1886                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1887                     {
1888                         //val = alignPtr.getResidue(i, j + firstRes - 1);
1889                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1890                     }
1891                     else
1892                     { 
1893                         val = -3;
1894                     }
1895                     if((val == -3) || (val == 253)) 
1896                     {
1897                         break;
1898                     }
1899                     else if((val < 0) || (val > userParameters->getMaxAA()))
1900                     {
1901                         seq1[j] = '-';
1902                     }
1903                     else 
1904                     {
1905                         seq1[j] = userParameters->getAminoAcidCode(val);
1906                         seqNo[i]++;
1907                         printSeqNo[i] = 1;
1908                     } 
1909                 }
1910                 for(; j <= ptr; ++j) 
1911                 {
1912                     seq1[j]='-';
1913                 }
1914             
1915                 sequenceLine = "";
1916
1917                 for(int index = pos; index < ptr + 1; index++)
1918                 {
1919                     sequenceLine += seq1[index];
1920                 }
1921                 
1922                 string seqName = alignPtr->getName(i);
1923                 //if((int)seqName.size() > MAXNAMESTODISPLAY)
1924                 if((int)seqName.size() > NAMESWIDTH)
1925                 {
1926                     //seqName = seqName.substr(0, MAXNAMESTODISPLAY);
1927                     seqName = seqName.substr(0, NAMESWIDTH);
1928                 }
1929                 
1930                 if (!userParameters->getSeqRange()) 
1931                 {
1932                     // NOTE I made a change here from + 5, to + 6.
1933                     //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left
1934                     //                  << alignPtr->getName(i);
1935
1936                     //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left
1937                     //                  << seqName; 
1938                     // PMcG changed this to revert to behaviour of clustalw1.83
1939                     // 
1940                     //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6)
1941                     (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName; 
1942
1943
1944                 }
1945                 else // Put the sequence range information in! 
1946                 {
1947                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1948                     string nameOnly = nameonly(seqName);
1949                     std::stringstream ss;
1950                     std::stringstream ss2;
1951                     string rangeStart;
1952                     string rangeEnd;
1953                     ss << rnum.start;
1954                     ss >> rangeStart;
1955                     ss2 << rnum.end;
1956                     ss2 >> rangeEnd;
1957                     tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd;
1958                     //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset) 
1959                     (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset) 
1960                                       << left << tmpStr;
1961                 }
1962             
1963                 (*clustalOutFile) << sequenceLine;
1964             
1965                 if (userParameters->getClSeqNumbers() && printSeqNo[i])
1966                 {
1967                     (*clustalOutFile) << " " << seqNo[i];
1968                 }
1969             
1970                 (*clustalOutFile) << "\n";
1971             }
1972         
1973             // Now print out the conservation information!
1974             for(i = pos; i <= ptr; ++i) 
1975             {
1976                 seq1[i] = ' ';
1977                 ident = 0;
1978
1979                 for(j = 1; j <= (int)strongGroup.size(); j++)
1980                 {
1981                     catident1[j - 1] = 0;
1982                 }
1983                 for(j = 1; j <= (int)weakGroup.size(); j++) 
1984                 {
1985                     catident2[j - 1] = 0;
1986                 }
1987                 
1988                 for(j = firstSeq; j <= lastSeq; ++j) 
1989                 {
1990                     if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) &&
1991                        (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq)))
1992                     {// NOTE june29
1993                         if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) && 
1994                             ((*alignment)[firstSeq][i + firstRes - 1] <=
1995                             userParameters->getMaxAA())) 
1996                         {
1997                             // Count how many are identical to the first sequence
1998                             if((*alignment)[firstSeq][i + firstRes - 1] == 
1999                                 (*alignment)[j][i + firstRes - 1])
2000                             {
2001                                 ++ident;
2002                             }
2003                             // Count how many are in the same category.
2004                             for(k = 1; k <= (int)strongGroup.size(); k++) 
2005                             {
2006                                 for(l = 0; (c = strongGroup[k - 1][l]); l++) 
2007                                 {
2008                                     int resCode = (*alignment)[j][i + firstRes - 1];
2009                                     if(resCode <= userParameters->getMaxAA() + 1)
2010                                     {
2011                                         if (userParameters->getAminoAcidCode(resCode) == c)
2012                                         {
2013                                             catident1[k - 1]++;
2014                                             break;
2015                                         }
2016                                     }
2017                                 }
2018                             }
2019                             for(k = 1; k <= (int)weakGroup.size(); k++) 
2020                             {
2021                                 for(l = 0; (c = weakGroup[k - 1][l]); l++) 
2022                                 {//NOTE june29
2023                                     int resCode = (*alignment)[j][i + firstRes - 1];
2024                                     if(resCode <= userParameters->getMaxAA() + 1)
2025                                     {
2026                                         if (userParameters->getAminoAcidCode(resCode) == c)
2027                                         {
2028                                             catident2[k - 1]++;
2029                                             break;
2030                                         }
2031                                     }
2032                                 }
2033                             }
2034                         }
2035                     }    
2036                 }
2037             
2038                 // Now do the conservation part for each block. 
2039                 if(ident == lastSeq - firstSeq + 1)
2040                 {
2041                     seq1[i] = '*'; // All residues the same!
2042                 }
2043                 else if (!userParameters->getDNAFlag()) 
2044                 {
2045                     for(k = 1; k <= (int)strongGroup.size(); k++) 
2046                     {
2047                         if (catident1[k - 1] == lastSeq - firstSeq + 1) 
2048                         {
2049                             seq1[i] = ':'; // All residues member of the same category
2050                             break;
2051                         }
2052                     }
2053                     if(seq1[i] == ' ')
2054                     {
2055                         for(k = 1; k <= (int)weakGroup.size(); k++) 
2056                         {
2057                             if (catident2[k - 1] == lastSeq - firstSeq + 1) 
2058                             {
2059                                 seq1[i] = '.'; // All residues member of the same category
2060                                 break;
2061                             }
2062                         }
2063                     }
2064                 }
2065             }
2066
2067             sequenceLine = "";
2068             for(int index = pos; index < ptr + 1; index++)
2069             {
2070                 sequenceLine += seq1[index];
2071             }
2072
2073             //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++)
2074             for(k = 0; k < NAMESWIDTH + 6; k++)
2075             { 
2076                 (*clustalOutFile) << " ";
2077             }
2078             if(userParameters->getSeqRange()) 
2079             {
2080
2081                 (*clustalOutFile) << "          ";
2082             }
2083
2084             (*clustalOutFile) << sequenceLine << "\n";
2085         }
2086         clustalOutFile->close();
2087     }
2088     catch(const bad_alloc& e)
2089     {
2090         clustalOutFile->close();
2091         cerr << "A bad_alloc exception has occured in the clustalOut function.\n"
2092              << e.what() << "\n";
2093         exit(1);
2094     }
2095     catch(VectorOutOfRange e)
2096     {
2097         clustalOutFile->close();
2098         cerr << "An exception has occured in the clustalOut function.\n"
2099              << e.what() << "\n";
2100         exit(1);    
2101     }
2102     catch(exception e)
2103     {
2104         clustalOutFile->close();
2105         cerr << "An exception has occured in the clustalOut function.\n"
2106              << e.what() << "\n";
2107         exit(1);    
2108     }
2109     catch(...)
2110     {
2111         clustalOutFile->close();
2112         cerr << "An exception has occured in the clustalOut function.\n";
2113         exit(1);    
2114     }    
2115 }
2116
2117 /*
2118  * The function printSecStructMask takes in the 2 mask vectors. It makes changes to 
2119  * mask and puts the result in structMask. This is the one that will be printed out.
2120  * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no
2121  * way to do random access setting in vectors otherwise.
2122  */
2123 void AlignmentOutput::printSecStructMask(int prfLength, vector<char>* mask, 
2124                                          vector<char>* structMask)
2125 {
2126     int i, j;
2127 //    calculate the gap penalty mask from the secondary structures
2128
2129     i = 0;
2130     try
2131     {
2132         while (i < prfLength) 
2133         {
2134             if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') 
2135             {
2136                 for (j = 0; j < userParameters->getHelixEndMinus(); j++) 
2137                 {
2138                     if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a'
2139                                     && mask->at(i+j) != '$')) 
2140                     {    
2141                         break;
2142                     }
2143                     (*structMask)[i+j] = 'a';
2144                 }
2145                 i += j;
2146                 while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') 
2147                 {
2148                     if (i >= prfLength) 
2149                         break;
2150                     if (mask->at(i) == '$') 
2151                     {
2152                         (*structMask)[i] = 'A';
2153                         i++;
2154                         break;
2155                     }
2156                     else 
2157                         (*structMask)[i] = (*mask)[i];
2158                     i++;
2159                 }
2160                 for (j = 0; j < userParameters->getHelixEndMinus(); j++) 
2161                 {
2162                     if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a'
2163                                         || mask->at(i-j-1) == '$'))
2164                     {
2165                         (*structMask)[i - j - 1] = 'a';
2166                     }
2167                 }
2168             }
2169             else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') 
2170             {
2171                 for (j = 0; j < userParameters->getStrandEndMinus(); j++) 
2172                 {
2173                     if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b'
2174                                         && mask->at(i+j) != '%')) 
2175                     {
2176                         break;
2177                     }
2178                     (*structMask)[i+j] = 'b';
2179                 }
2180                 i += j;
2181                 while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') 
2182                 {
2183                     if (i >= prfLength) 
2184                         break;
2185                     if (mask->at(i) == '%') 
2186                     {
2187                         (*structMask)[i] = 'B';
2188                         i++;
2189                         break;
2190                     }
2191                     else
2192                     { 
2193                         (*structMask)[i] = mask->at(i);
2194                     }
2195                     i++;
2196                 }
2197                 for (j = 0; j < userParameters->getStrandEndMinus(); j++) 
2198                 {
2199                     if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' || 
2200                         mask->at(i-j-1) == '%'))
2201                     {
2202                         (*structMask)[i-j-1] = 'b';
2203                     }
2204                 }
2205             }
2206             else
2207             { 
2208                 i++;
2209             }
2210         }
2211     }
2212     catch(const exception& e)
2213     {
2214         // Catch all the exceptions
2215         cerr << "There has been an exception in the function printSecStructMask\n"
2216              << e.what() << "\n";
2217         exit(1);
2218     }
2219 }
2220
2221 /*
2222  * The function findRangeValues is used to find the range to be printed out.
2223  *
2224  *
2225  */
2226 void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes, 
2227                                 int len, int firstSeq)
2228 {
2229     assert(rnum);
2230     assert(firstRes > 0);
2231     assert(len > 0);
2232     assert(firstSeq > 0);
2233     
2234     try
2235     {
2236         int val;
2237         int i, ii;
2238         int j, slen;    
2239
2240         char tmpName[FILENAMELEN + 15];
2241         int iStart = 0;
2242         int iEnd = 0; // to print sequence start-end with names
2243         int found = 0;
2244         int ngaps = 0;
2245         int tmpStart = 0; 
2246         int tmpEnd = 0;
2247         int ntermgaps = 0;
2248         int pregaps = 0;
2249         int tmpk = 0;
2250         int isRange = 0;
2251         int formula = 0;
2252         const SeqArray* alignment = alignPtr->getSeqArray();
2253         
2254         tmpName[0] = '\0';
2255         slen = 0;
2256
2257         ii = firstSeq ;
2258         i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere! 
2259         string name = alignPtr->getName(i);
2260         
2261         if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3)) 
2262         {
2263             isRange = 1;
2264         }
2265         
2266         for(tmpk = 1; tmpk < firstRes; tmpk++)
2267         { 
2268             // do this irrespective of above sscanf
2269             //val = alignPtr.getResidue(i, tmpk);
2270             val = (*alignment)[i][tmpk]; // NOTE june29
2271             if ((val < 0) || (val > userParameters->getMaxAA()))
2272             { 
2273                 //it is gap
2274                 pregaps++;
2275             }
2276         }
2277         
2278         for(j = firstRes; j < firstRes + len; j++) 
2279         {
2280             if(j > alignPtr->getSeqLength(i))
2281             {
2282                 val = -3; // Cant get it so break out.
2283             }
2284             else
2285             {
2286                 //val = alignPtr.getResidue(i, j);
2287                 val = (*alignment)[i][j]; // NOTE june29
2288             }
2289             if((val == -3) || (val == 253))
2290             {
2291                 break;
2292             }
2293             else if((val < 0) || (val > userParameters->getMaxAA())) 
2294             {
2295                 ngaps++;
2296             }
2297             else 
2298             {
2299                 found = j;
2300             }
2301             if ( found && (iStart == 0) ) 
2302             {
2303                 iStart = found;
2304                 ntermgaps = ngaps;
2305             }
2306             slen++;
2307         }
2308         if(userParameters->getSeqRange()) 
2309         {
2310             cout << "Name : " << alignPtr->getName(i) << " "
2311                  << "\n  firstRes = "<< firstRes << " "
2312                  << "   len = " << len << " "
2313                  << "\n  iStart = " << iStart << " "
2314                  << "\n  tmpStart = " << tmpStart << " "
2315                  << "\n  ngaps = " << ngaps << " "
2316                  << "\n  pregaps = " << pregaps << " ";
2317             if (!isRange)
2318             {
2319                 formula = iStart - pregaps;
2320             }
2321             else
2322             {
2323                 formula = iStart - pregaps +  ( tmpStart == 1 ? 0: tmpStart-1) ;
2324             }
2325
2326             cout << "\n\nsuggestion  iStart - pregaps + tmpStart - ntermgaps = "
2327                  << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps
2328                  << " formula " << formula << " ";
2329         }
2330         else 
2331         {
2332             cerr << "\n no range found .... strange,  iStart = " << iStart;
2333             formula = 1;
2334         }
2335         if (pregaps == firstRes - 1) // all gaps -  now the conditions........ 
2336         {
2337             formula = tmpStart ; // keep the previous start... 
2338         }
2339         formula = (formula <= 0) ? 1: formula;
2340         if (pregaps == 0 && tmpStart == 0) 
2341         {
2342             formula = firstRes;
2343         }
2344         iEnd = formula + len - ngaps -1;
2345         rnum->start = formula;
2346         rnum->end = iEnd;
2347         cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start 
2348              << " - " << rnum->end;
2349         cout << " Done checking.........";
2350     }
2351     catch(VectorOutOfRange e)
2352     {
2353         cerr << "An exception has occured in the findRangeValues function.\n"
2354              << e.what() << "\n";
2355         exit(1);    
2356     }    
2357     catch(...)
2358     {
2359         cerr << "An exception has occured in findRangeValues function\n";
2360         exit(1);
2361     }
2362 }
2363
2364 string AlignmentOutput::nameonly(string s)
2365 {
2366     string tmp;
2367     try
2368     {
2369         int i = 0;
2370
2371         while (i < (int)s.size()) 
2372         {
2373             if(s.at(i) != '/')
2374             {
2375                 tmp += s.at(i);
2376                 i++;
2377             }
2378             else
2379             {
2380                 break;
2381             }
2382         }
2383     }
2384     catch(const exception& e)
2385     {
2386         cerr << "An exception has occured in the function nameonly\n"
2387              << e.what();
2388         exit(1);
2389     }
2390     return tmp;
2391 }
2392
2393 int AlignmentOutput::SeqGCGCheckSum(vector<char>* seq, int length)
2394 {
2395     int i;
2396     long check;
2397     int seqResIndex = 1;
2398     //int seqLength = seq->size();
2399     
2400     for (i = 0, check = 0; i < length; i++, seqResIndex++)
2401     {
2402         char _toUpperCase = (*seq)[seqResIndex];
2403         check += ((i % 57) + 1) * toupper(_toUpperCase);
2404     }
2405
2406     return (check % 10000);
2407 }
2408
2409 void AlignmentOutput::showAlign()
2410 {
2411     //FILE *file;
2412     int  numLines;
2413     char temp[MAXLINE + 1];
2414     temp[0] = '\0';
2415     string fileName;
2416     string answer;
2417     
2418     if(userParameters->getOutputClustal()) 
2419     {
2420         fileName = clustalOutName;
2421     }
2422     else if(userParameters->getOutputNbrf()) 
2423     {
2424         fileName = nbrfOutName;
2425     }
2426     else if(userParameters->getOutputGCG()) 
2427     {
2428         fileName = gcgOutName;
2429     }
2430     else if(userParameters->getOutputPhylip()) 
2431     {
2432         fileName = phylipOutName;
2433     }
2434     else if(userParameters->getOutputGde()) 
2435     {
2436         fileName = gdeOutName;
2437     }
2438     else if(userParameters->getOutputNexus()) 
2439     {
2440         fileName = nexusOutName;
2441     }
2442     else if(userParameters->getOutputFasta()) 
2443     {
2444         fileName = fastaOutName;
2445     }
2446     else
2447     {
2448         return; // No file output type!
2449     }
2450
2451     ifstream _fileIn;
2452     _fileIn.open(fileName.c_str(), ios::in);        
2453     _fileIn.seekg(0, std::ios::beg); // start at the beginning
2454     
2455     cout << "\n\n";
2456     numLines = 0;
2457
2458     while(_fileIn.getline(temp, MAXLINE + 1, '\n')) 
2459     {
2460        //fputs(temp,stdout);
2461        cout << temp << "\n";
2462        ++numLines;
2463        if(numLines >= PAGE_LEN) 
2464        {
2465            cout << "\n";
2466            utilityObject->getStr(string("Press [RETURN] to continue or  X  to stop"), answer);
2467            if(toupper(answer[0]) == 'X') 
2468            {
2469                _fileIn.close();
2470                return;
2471            }
2472            else
2473            {
2474                numLines = 0;
2475            }
2476        }
2477     }
2478     _fileIn.close();
2479     cout << "\n";
2480     utilityObject->getStr(string("Press [RETURN] to continue"), answer);
2481 }
2482
2483 }
2484