a0b9de3e5580f47f199f6adb769853e382309e11
[jabaws.git] / binaries / src / clustalw / src / alignment / AlignmentOutput.cpp
1 /**
2  * Author: Mark Larkin
3  * 
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
5  */
6 /**
7  * Output routines ported from clustalx C code in 'interface.c'.
8  *
9  * Changes:
10  *
11  * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention
12  * to define output columns, while the port uses (firsRes, lastRes). Fixed
13  * problems in all 7 output routines where the conventions were mixed giving
14  * over-long output blocks.
15  *
16  * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut
17  *
18  * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83
19  *                     added const NAMESWIDTH which is calculated based on the
20  *                     length of the longest sequence name and the upper and lower
21  *                     limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY
22  */
23
24 #ifdef HAVE_CONFIG_H
25     #include "config.h"
26 #endif
27 #include "AlignmentOutput.h"
28
29 namespace clustalw
30 {
31
32 AlignmentOutput::AlignmentOutput()
33 : clusSecStructOffset(9),
34   clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs
35 {
36     // NOTE in the old clustal these arrays ended with NULL as a value.
37     try
38     {
39         strongGroup.resize(9);
40         strongGroup[0] = string("STA");
41         strongGroup[1] = string("NEQK");
42         strongGroup[2] = string("NHQK");
43         strongGroup[3] = string("NDEQ");
44         strongGroup[4] = string("QHRK");
45         strongGroup[5] = string("MILV");
46         strongGroup[6] = string("MILF");
47         strongGroup[7] = string("HY");
48         strongGroup[8] = string("FYW");
49     
50         weakGroup.resize(11);
51         weakGroup[0] = string("CSA");
52         weakGroup[1] = string("ATV");
53         weakGroup[2] = string("SAG");
54         weakGroup[3] = string("STNK");
55         weakGroup[4] = string("STPA");
56         weakGroup[5] = string("SGND");
57         weakGroup[6] = string("SNDEQK");
58         weakGroup[7] = string("NDEQHK");
59         weakGroup[8] = string("NEQHRK");
60         weakGroup[9] = string("FVLIM");
61         weakGroup[10] = string("HFY");
62     }
63     catch(const exception& e)
64     {
65         cerr << "An exception has occured in the contructor of AlignmentOutput.\n"
66              << e.what() << "\n";
67         exit(1);    
68     }
69 }
70
71 /*
72  * The function createAlignmentOutput is used to call all the individual functions
73  * for the different file types. It is possible to output the alignment in all file
74  * types at the same time.
75  */
76 void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq)
77 {
78     int length;  
79     int firstRes; // starting sequence range - Ramu
80     int lastRes; // ending sequence range 
81     bool rangeOK;
82     
83     if((firstSeq <= 0) || (lastSeq < firstSeq))
84     {
85         utilityObject->error("Cannot produce alignment output."
86                 " Incorrect call to createAlignmentOutput."
87                 " firstSeq = %d"
88                 " lastSeq = %d\n", firstSeq, lastSeq);
89         return;
90     }
91     
92     length = 0;
93     firstRes = 1;
94     lastRes = 0;
95     rangeOK = false;
96     
97     try
98     {
99         length = alignPtr->getLengthLongestSequence();
100         lastRes = length;
101
102         if (userParameters->getRangeFromToSet()) 
103         {
104             firstRes = userParameters->getRangeFrom();
105             lastRes = userParameters->getRangeTo();
106             // Check if the numbers are ok.
107             if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1)) 
108             {
109                 cerr << "seqrange numbers are not set properly, using default....\n";
110                 firstRes = 1;
111                 lastRes = length;
112             }
113             else
114             {
115                 rangeOK = true;
116             }
117         }
118         if (rangeOK && (lastRes > length)) 
119         {
120             lastRes = length;
121             cout << "Seqrange " << lastRes << " is more than the " << length 
122                  << "  setting it to " << length << " \n";
123         }
124
125         if (userParameters->getMenuFlag())
126         {    
127             cout << "Consensus length = " << lastRes << " \n";
128         }
129
130         outputRegion partToOutput;
131         partToOutput._firstSeq = firstSeq;
132         partToOutput._lastSeq = lastSeq;
133         partToOutput._firstRes = firstRes;
134         partToOutput._lastRes = lastRes;
135
136         if(userParameters->getOutputClustal()) 
137         {
138             if(clustalOutFile.get() != 0 && clustalOutFile->is_open())
139             {
140                 clustalOut(alignPtr, partToOutput);
141                 if(clustalOutFile->is_open())
142                 {
143                     clustalOutFile->close();
144                 }
145                 utilityObject->info("CLUSTAL-Alignment file created  [%s]\n",
146                                     clustalOutName.c_str());
147             }
148         }
149         if(userParameters->getOutputNbrf())  
150         {
151             if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open())
152             {            
153                 nbrfOut(alignPtr, partToOutput);
154                 if(nbrfOutFile->is_open())
155                 {
156                     nbrfOutFile->close();
157                 }
158                 utilityObject->info("NBRF/PIR-Alignment file created  [%s]\n",
159                                     nbrfOutName.c_str());                
160             }
161         }
162         if(userParameters->getOutputGCG())  
163         {
164             if(gcgOutFile.get() != 0 && gcgOutFile->is_open())
165             {
166                 gcgOut(alignPtr, partToOutput);
167                 if(gcgOutFile->is_open())
168                 {
169                     gcgOutFile->close();
170                 }
171                 utilityObject->info("GCG-Alignment file created      [%s]\n",
172                                     gcgOutName.c_str());                
173             }
174         }
175         if(userParameters->getOutputPhylip())  
176         {
177             if(phylipOutFile.get() != 0 && phylipOutFile->is_open())
178             {
179                 phylipOut(alignPtr, partToOutput);
180                 if(phylipOutFile->is_open())
181                 {
182                     phylipOutFile->close();
183                 }
184                 utilityObject->info("PHYLIP-Alignment file created   [%s]\n",
185                                     phylipOutName.c_str());                
186             }
187         }
188         
189         if(userParameters->getOutputGde())  
190         {
191             if(gdeOutFile.get() != 0 && gdeOutFile->is_open())
192             {
193                 gdeOut(alignPtr, partToOutput);
194                 if(gdeOutFile->is_open())
195                 {
196                     gdeOutFile->close();
197                 }
198                 utilityObject->info("GDE-Alignment file created      [%s]\n",
199                                     gdeOutName.c_str());                
200             }
201         }
202         
203         if(userParameters->getOutputNexus())  
204         {
205             if(nexusOutFile.get() != 0 && nexusOutFile->is_open())
206             {
207                 nexusOut(alignPtr, partToOutput);
208                 if(nexusOutFile->is_open())
209                 {
210                     nexusOutFile->close();
211                 }
212                 utilityObject->info("NEXUS-Alignment file created    [%s]\n",
213                                     nexusOutName.c_str());                
214             }
215         }
216
217         if(userParameters->getOutputFasta())  
218         {
219             if(fastaOutFile.get() != 0 && fastaOutFile->is_open())
220             {
221                 fastaOut(alignPtr, partToOutput);
222                 if(fastaOutFile->is_open())
223                 {
224                     fastaOutFile->close();
225                 }
226                 utilityObject->info("Fasta-Alignment file created    [%s]\n",
227                                     fastaOutName.c_str());                
228             }
229         }
230         
231         // Output the alignment to the screen if it is required.
232         if (userParameters->getShowAlign() && userParameters->getMenuFlag()) 
233         {
234             showAlign();
235         }
236     }
237     catch(const exception& e)
238     {
239         cerr << "An exception has occured in the createAlignmentOutput function.\n"
240              << e.what() << "\n";
241         exit(1);    
242     }
243 }
244
245 bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames)
246 {
247     if(!userParameters->getOutputClustal() && 
248        !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
249        !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
250        !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) 
251     {
252          utilityObject->error("You must select an alignment output format\n");
253          return false;
254     }
255         
256     if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == ""
257        && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == ""
258        && fileNames.phylipFile == "")
259     {
260         utilityObject->error("No names for output files. Cannot output alignment.\n");
261         return false;
262     }
263     
264     if(fileNames.clustalFile != "")
265     {
266         clustalOutName = fileNames.clustalFile;
267         if(!openExplicitFile(clustalOutFile, clustalOutName))
268         { 
269             return false;
270         }
271     }  
272     if(fileNames.fastaFile != "")
273     {
274         fastaOutName = fileNames.fastaFile;
275         if(!openExplicitFile(fastaOutFile, fastaOutName))
276         { 
277             return false;
278         }
279     }
280     if(fileNames.gcgFile != "")
281     {
282         gcgOutName = fileNames.gcgFile;
283         if(!openExplicitFile(gcgOutFile, gcgOutName))
284         { 
285             return false;
286         }
287     }
288     if(fileNames.gdeFile != "")
289     {
290         gdeOutName = fileNames.gdeFile;
291         if(!openExplicitFile(gdeOutFile, gdeOutName))
292         { 
293             return false;
294         }
295     }
296     if(fileNames.nexusFile != "")
297     {
298         nexusOutName = fileNames.nexusFile;
299         if(!openExplicitFile(nexusOutFile, nexusOutName))
300         { 
301             return false;
302         }
303     }
304     if(fileNames.nrbfFile != "")
305     {
306         nbrfOutName = fileNames.nrbfFile;
307         if(!openExplicitFile(nbrfOutFile, nbrfOutName))
308         { 
309             return false;
310         }
311     }
312     if(fileNames.phylipFile != "")
313     {
314         phylipOutName = fileNames.phylipFile;
315         if(!openExplicitFile(phylipOutFile, phylipOutName))
316         { 
317             return false;
318         }
319     } 
320     return true;                         
321 }
322 /*
323  * The function openAlignmentOutput opens a file for output. It returns true if it
324  * has been successful and false if it has not been successful
325  */
326 bool AlignmentOutput::openAlignmentOutput(string path)
327 {
328     if(!userParameters->getOutputClustal() && 
329        !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() &&
330        !userParameters->getOutputPhylip() && !userParameters->getOutputGde() &&
331        !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) 
332     {
333          utilityObject->error("You must select an alignment output format\n");
334          return false;
335     }
336     string _fileNameToOutput = path;
337     if(_fileNameToOutput == "")
338     {
339       /*_fileNameToOutput = userParameters->getSeqName();*/
340       /* BUG 166 , file extension, FS, 2009-01-26 */
341       utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput);
342     }
343     
344     if(userParameters->getOutputClustal()) 
345     {
346         if (userParameters->getOutfileName() != "") 
347         {
348             clustalOutName = userParameters->getOutfileName();
349             if(!openExplicitFile(clustalOutFile, clustalOutName))
350             { 
351                 return false;
352             }
353         }
354         else 
355         {
356             clustalOutName = openOutputFile(clustalOutFile, 
357                              "\nEnter a name for the CLUSTAL output file ",
358                              _fileNameToOutput, "aln");
359                              
360             if(clustalOutName == "")
361             {
362                 return false; // We have not been successful.
363             }
364         }
365     }
366
367     if(userParameters->getOutputNbrf()) 
368     {
369         if (userParameters->getOutfileName() != "") 
370         {
371             nbrfOutName = userParameters->getOutfileName();
372             if(!openExplicitFile(nbrfOutFile, nbrfOutName))
373             { 
374                 return false;
375             }            
376         }
377         else
378         {
379             nbrfOutName = openOutputFile(nbrfOutFile,
380                       "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput,
381                        "pir");
382                       
383             if(nbrfOutName == "")
384             {
385                 return false; // We have not been successful.
386             }
387         }
388     }
389
390     if(userParameters->getOutputGCG()) 
391     {
392         if (userParameters->getOutfileName() != "") 
393         {
394             gcgOutName = userParameters->getOutfileName();
395             if(!openExplicitFile(gcgOutFile, gcgOutName))
396             { 
397                 return false;
398             }   
399         }
400         else
401         {
402             gcgOutName = openOutputFile(gcgOutFile,
403                       "\nEnter a name for the GCG output file", _fileNameToOutput, "msf");
404                       
405             if(gcgOutName == "")
406             {
407                 return false; // We have not been successful.
408             }
409         }
410     }
411
412     if(userParameters->getOutputPhylip()) 
413     {
414         if (userParameters->getOutfileName() != "") 
415         {
416             phylipOutName = userParameters->getOutfileName();
417             if(!openExplicitFile(phylipOutFile, phylipOutName))
418             { 
419                 return false;
420             }
421         }
422         else
423         {
424             phylipOutName = openOutputFile(phylipOutFile,
425                       "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy");
426                       
427             if(phylipOutName == "")
428             {
429                 return false; // We have not been successful.
430             }
431         }
432     }
433
434     if(userParameters->getOutputGde()) 
435     {
436         if (userParameters->getOutfileName() != "") 
437         {
438             gdeOutName = userParameters->getOutfileName();
439             if(!openExplicitFile(gdeOutFile, gdeOutName))
440             { 
441                 return false;
442             }
443         }
444         else
445         {
446             gdeOutName = openOutputFile(gdeOutFile, 
447                              "\nEnter a name for the GDE output file     ",
448                              _fileNameToOutput, "gde");
449                  
450             if(gdeOutName == "")
451             {
452                 return false; // We have not been successful.
453             }                       
454         }
455     }
456
457     if(userParameters->getOutputNexus()) 
458     {
459         if (userParameters->getOutfileName() != "") 
460         {
461             nexusOutName = userParameters->getOutfileName();
462             if(!openExplicitFile(nexusOutFile, nexusOutName))
463             { 
464                 return false;
465             }
466         }
467         else
468         {
469             nexusOutName = openOutputFile(nexusOutFile, 
470                              "\nEnter a name for the NEXUS output file   ",
471                              _fileNameToOutput, "nxs");
472                  
473             if(nexusOutName == "")
474             {
475                 return false; // We have not been successful.
476             }                       
477         }
478     }
479    
480     if(userParameters->getOutputFasta()) 
481     {
482         if (userParameters->getOutfileName() != "") 
483         {
484             fastaOutName = userParameters->getOutfileName();
485             if(!openExplicitFile(fastaOutFile, fastaOutName))
486             { 
487                 return false;
488             }
489         }
490         else
491         {
492             fastaOutName = openOutputFile(fastaOutFile, 
493                              "\nEnter a name for the Fasta output file ",
494                              _fileNameToOutput, "fasta");
495                  
496             if(fastaOutName == "")
497             {
498                 return false; // We have not been successful.
499             }
500         }
501     }
502   
503     return true;
504
505 }
506
507
508 /*
509  * The function openExplicitFile is used to open a file when we have the name already.
510  * It returns true if the file has been opened correctly, false otherwise.
511  */
512 bool AlignmentOutput::openExplicitFile(auto_ptr<ofstream>& outFile, string fileName)
513 {
514     if (fileName == "") 
515     {
516         cerr << "Bad output file [" << fileName << "]\n";
517         utilityObject->error("Bad output file [%s]\n", fileName.c_str());
518         return false;
519     }
520     
521     outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc));
522     if(!outFile->is_open()) 
523     {
524         utilityObject->error("Cannot open output file [%s]\n", fileName.c_str());
525         return false;
526     }
527     return true;
528 }
529
530 /*
531  * The function openOutputFile is used to open a file when we dont have the name yet.
532  * This function returns the name if it has been successful. If it has not been 
533  * successful it returns a blank string ""
534  */
535 string AlignmentOutput::openOutputFile(auto_ptr<ofstream>& outFile, string prompt, 
536                                        string path, string fileExtension)
537 {
538     string temp;
539     string _fileName; // Will return this name.
540     string message;
541     _fileName = path + fileExtension;
542
543     if(_fileName.compare(userParameters->getSeqName()) == 0) 
544     {
545         cout << "Output file name is the same as input file.\n";
546         if (userParameters->getMenuFlag()) 
547         {
548             message = "\n\nEnter new name to avoid overwriting  [" + _fileName + "]";
549
550             utilityObject->getStr(message, temp);
551             if(temp != "")
552             {
553                 _fileName = temp;
554             }
555         }
556     }
557     else if (userParameters->getMenuFlag()) 
558     {
559
560         message = prompt + " [" + _fileName + "]";
561         utilityObject->getStr(message, temp);
562         if(temp != "")
563         {
564             _fileName = temp;
565         }
566     }
567
568     outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc));
569     if(!outFile->is_open()) 
570     {
571         utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str());
572         return "";
573     }    
574     return _fileName;
575 }
576
577 /*
578  * fastaOut: output the alignment in FASTA format
579  */
580 void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput)
581 {
582     char residue;
583     int val;
584     int i, ii;
585     int j, slen;    
586     int lineLength;
587     int firstRes = partToOutput._firstRes;
588     int lastRes = partToOutput._lastRes;
589     int firstSeq = partToOutput._firstSeq;
590     int lastSeq = partToOutput._lastSeq;
591     rangeNum rnum;  
592     cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n";
593     try
594     {
595         const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29
596         vector<char> sequence;
597         sequence.assign(lastRes + 1, '0');     
598     
599         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
600                 
601         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
602         if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
603         {    
604             lineLength = LINELENGTH;
605         }
606
607
608         for(ii = firstSeq; ii <= lastSeq; ii++) 
609         {
610             i = alignPtr->getOutputIndex(ii - 1);
611             slen = 0;
612             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
613             for(j = firstRes; j <= lastRes; j++) //- nige
614             {
615                 if (j <= alignPtr->getSeqLength(i))
616                 {
617                     //val = alignPtr.getResidue(i, j);
618                     val = (*alignment)[i][j]; // NOTE june29
619                 }
620                 else
621                 { 
622                     val = -3;
623                 }
624                 if((val == -3) || (val == 253))
625                 {
626                     break;
627                 }
628                 else if((val < 0) || (val > userParameters->getMaxAA())) 
629                 {
630                     residue = '-';
631                 }
632                 else 
633                 {
634                     residue = userParameters->getAminoAcidCode(val);
635                 }
636                 if (userParameters->getLowercase())
637                 {    
638                     sequence[j - firstRes] = residue;
639                 }
640                 else
641                 {
642                     sequence[j - firstRes] = residue;
643                 }
644                 slen++;
645             }
646             (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i));
647             if(userParameters->getSeqRange()) 
648             {
649                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
650                 (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end;
651                 
652             }
653             (*fastaOutFile) << "\n";
654             for(j = 1; j <= slen; j++) 
655             {
656                 (*fastaOutFile) << sequence[j-1];
657                 if((j % lineLength == 0) || (j == slen))
658                 { 
659                     (*fastaOutFile) << "\n";
660                 }
661             }
662         }
663         fastaOutFile->close();
664         cout << "FASTA file created!\n";
665     }
666     catch(const bad_alloc& e)
667     {
668         fastaOutFile->close();
669         cerr << "A bad_alloc exception has occured in the fastaOut function.\n"
670              << e.what() << "\n";
671         exit(1);
672     }
673     catch(VectorOutOfRange e)
674     {
675         fastaOutFile->close();
676         cerr << "An exception has occured in the fastaOut function.\n"
677              << e.what() << "\n";
678         exit(1);    
679     }
680     catch(...)
681     {
682         fastaOutFile->close();
683         cerr << "An exception has occured in the fastaOut function.\n";
684         exit(1);    
685     }
686
687 }
688
689 /* 
690  * gcgOut: output the alignment in gcg file format
691  */
692 void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput)
693 {
694     char residue;
695     int val;
696     int i, ii, chunks, block;
697     int j, k, pos1, pos2;    
698     long grandChecksum;
699  
700     try
701     {  
702         int firstRes = partToOutput._firstRes;
703         int lastRes = partToOutput._lastRes;
704         int firstSeq = partToOutput._firstSeq;
705         int lastSeq = partToOutput._lastSeq;    
706         rangeNum rnum;
707         vector<char> sequence;
708         vector<int> allChecks;
709         int _maxLength = alignPtr->getMaxAlnLength();
710         sequence.assign(_maxLength + 1, '0');
711         allChecks.assign(lastSeq + 1, 0);
712         const SeqArray* alignment = alignPtr->getSeqArray();
713         
714         for(i = firstSeq; i <= lastSeq; i++) 
715         {
716             //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige
717             for(j = firstRes; j <= lastRes; j++) //- nige
718             {
719                 if (j <= alignPtr->getSeqLength(i))
720                 {
721                     //val = alignPtr.getResidue(i, j);
722                     val = (*alignment)[i][j]; // NOTE june29
723                 }
724                 else
725                 { 
726                     val = -3;
727                 }
728                 if((val == -3) || (val == 253)) 
729                 {
730                     break;
731                 }
732                 else if((val < 0) || (val > userParameters->getMaxAA()))
733                 {
734                     residue = '.';
735                 }
736                 else 
737                 {
738                     residue = userParameters->getAminoAcidCode(val);
739                 }
740                 sequence[j - firstRes + 1] = residue;
741             }
742             // pad any short sequences with gaps, to make all sequences the same length 
743             for(; j <= firstRes + lastRes - 1; j++)
744             { 
745                 sequence[j - firstRes + 1] = '.';
746             }
747             allChecks[i] = SeqGCGCheckSum(&sequence, lastRes);
748         }    
749         int _index;
750         grandChecksum = 0;
751         for(i = 1; i <= alignPtr->getNumSeqs(); i++)
752         {
753             _index = alignPtr->getOutputIndex(i - 1);
754             grandChecksum += allChecks[_index];
755         }
756         grandChecksum = grandChecksum % 10000;
757         
758         (*gcgOutFile) << "PileUp\n\n";
759         (*gcgOutFile) << "\n\n   MSF:" << setw(5) << lastRes << "  Type: ";
760                         
761         if(userParameters->getDNAFlag())
762         {
763             (*gcgOutFile) << "N";
764         }
765         else
766         {
767             (*gcgOutFile) << "P";
768         }
769
770         (*gcgOutFile) << "    Check:" << setw(6) << grandChecksum << "   .. \n\n";
771         float _seqWeight;
772
773         int length = lastRes - firstRes + 1; //- nige
774
775         for(ii = firstSeq; ii <= lastSeq; ii++)  
776         {
777             i = alignPtr->getOutputIndex(ii - 1);
778             _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR;
779
780             (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo  Len:"
781                         //<< setw(5) << lastRes << "  Check:" << setw(6) << allChecks[i]  //- nige
782                           << setw(5) << length << "  Check:" << setw(6) << allChecks[i] 
783                           << "  Weight:  " << fixed << setprecision(1) << _seqWeight << "\n";
784         }  
785         (*gcgOutFile) << "\n//\n";
786         
787         chunks = length / GCG_LINELENGTH; //- nige
788         if(length % GCG_LINELENGTH != 0)  //- nige
789         {
790             ++chunks;
791         }
792   
793         for(block = 1; block <= chunks; block++) 
794         {
795             (*gcgOutFile) << "\n\n";
796             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
797             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige
798             
799             for(ii = firstSeq; ii <= lastSeq; ii++) 
800             {
801                 i = alignPtr->getOutputIndex(ii - 1);
802                 if (!userParameters->getSeqRange()) 
803                 {
804                     (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left
805                                   << alignPtr->getName(i) << " ";
806                 }
807                 else 
808                 {
809                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
810                     std::stringstream ss;
811                     std::stringstream ss2;
812                     string rangeStart;
813                     string rangeEnd;
814                     ss << rnum.start;
815                     ss >> rangeStart;
816                     ss2 << rnum.end;
817                     ss2 >> rangeEnd;
818                     string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart;
819                     nameAndRange += "-" + rangeEnd;
820                     (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left
821                                   << nameAndRange;
822                 }
823                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
824                 {
825             
826                     //JULIE -
827                     //check for int sequences - pad out with '.' characters to end 
828             
829                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
830                     {
831                         //val = alignPtr.getResidue(i, j + firstRes - 1);
832                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
833                     }
834                     else
835                     {
836                         val = -3;
837                     }
838                     if((val == -3) || (val == 253))
839                     {
840                         residue = '.';
841                     }
842                     else if((val < 0) || (val > userParameters->getMaxAA()))
843                     {
844                         residue = '.';
845                     }
846                     else 
847                     {
848                         residue = userParameters->getAminoAcidCode(val);
849                     }
850                     (*gcgOutFile) << residue;
851                     if(j % 10 == 0)
852                     { 
853                         (*gcgOutFile) << " ";
854                     }
855                 }
856             }
857         }
858         (*gcgOutFile) << "\n\n";
859         gcgOutFile->close();   
860     }
861     catch(const bad_alloc& e)
862     {
863         gcgOutFile->close();
864         cerr << "A bad_alloc exception has occured in the gcgOut function.\n"
865              << e.what() << "\n";
866         exit(1);
867     }
868     catch(VectorOutOfRange e)
869     {
870         gcgOutFile->close();
871         cerr << "An exception has occured in the gcgOut function.\n"
872              << e.what() << "\n";
873         exit(1);    
874     }
875     catch(...)
876     {
877         gcgOutFile->close();
878         cerr << "An exception has occured in the gcgOut function.\n";
879         exit(1);    
880     }    
881 }
882
883 void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput)
884 {
885     int firstRes = partToOutput._firstRes;
886     int lastRes = partToOutput._lastRes;
887     int firstSeq = partToOutput._firstSeq;
888     int lastSeq = partToOutput._lastSeq;    
889     try
890     {
891         char residue;
892         int val;
893         int i, ii, chunks, block;    
894         int j, k, pos1, pos2;    
895         int nameLen;
896         bool warn;
897         vector<string> _seqNames;
898         const SeqArray* alignment = alignPtr->getSeqArray();
899         rangeNum rnum;
900
901         // Push on a blank string. This is because the index starts at 1 for the seqs etc..
902         _seqNames.push_back("");
903
904         nameLen = 0;
905     
906         for(i = firstSeq; i <= lastSeq; i++)  
907         {
908             _seqNames.push_back(alignPtr->getName(i));
909             ii = _seqNames.size();
910             if(nameLen < ii) 
911             {
912                 nameLen = ii;
913             }
914         }
915         if(nameLen > 10) 
916         {
917             warn = false;
918             for(i = 0; i < (int)_seqNames.size() - 1; i++)  
919             {
920                 for(j = i + 1; j < (int)_seqNames.size(); j++) 
921                 {
922                     if (_seqNames[i].compare(_seqNames[j]) == 0) 
923                     {
924                         warn = true;
925                     }
926                 }
927             }
928             if(warn)
929             {
930                 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous.");
931             }
932             else
933             {
934                 utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.");
935             }
936         }
937   
938         int length = lastRes - firstRes + 1; //- nige
939
940         chunks = length / GCG_LINELENGTH; //- nige
941         if(length % GCG_LINELENGTH != 0)  //- nige
942         {
943             ++chunks;
944         }
945   
946         (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " "
947                          << setw(6) << length; //-nige
948         
949         for(block = 1; block <= chunks; block++) 
950         {
951             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
952             //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige
953             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;
954             for(ii = firstSeq; ii <= lastSeq; ii++)  
955             {
956                 i = alignPtr->getOutputIndex(ii - 1);
957                 if(block == 1)  
958                 {
959                     if(!userParameters->getSeqRange()) 
960                     {
961                         string name = alignPtr->getName(i);
962                         (*phylipOutFile) << "\n" << setw(10) << left 
963                                              << name.substr(0,10) << " ";
964                     }
965                     else
966                     {
967                         findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
968                         std::stringstream ss;
969                         std::stringstream ss2;
970                         string rangeStart;
971                         string rangeEnd;
972                         ss << rnum.start;
973                         ss >> rangeStart;
974                         ss2 << rnum.end;
975                         ss2 >> rangeEnd;
976                         string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
977                         nameAndRange += rangeStart + "-" + rangeEnd;
978                         (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
979                                          << left
980                                          << nameAndRange;
981                     }
982                 }
983                 else
984                 {
985                     (*phylipOutFile) << "\n           ";
986                 }
987                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
988                 {
989                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
990                     {
991                         //val = alignPtr.getResidue(i, j + firstRes - 1);
992                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
993                     }
994                     else
995                     {
996                         val = -3;
997                     }
998                     if((val == -3) || (val == 253))
999                     {
1000                         break;
1001                     }
1002                     else if((val < 0) || (val > userParameters->getMaxAA()))
1003                     {
1004                         residue = '-';
1005                     }
1006                     else 
1007                     {
1008                         residue = userParameters->getAminoAcidCode(val);
1009                     }
1010                     (*phylipOutFile) << residue;
1011                     if(j % 10 == 0) 
1012                     {
1013                         (*phylipOutFile) << " ";
1014                     }
1015                 }
1016             }
1017             (*phylipOutFile) << "\n";
1018         }  
1019     }
1020     catch(const bad_alloc& e)
1021     {
1022         phylipOutFile->close();
1023         cerr << "A bad_alloc exception has occured in the phylipOut function.\n"
1024              << e.what() << "\n";
1025         exit(1);
1026     }
1027     catch(VectorOutOfRange e)
1028     {
1029         phylipOutFile->close();
1030         cerr << "An exception has occured in the phylipOut function.\n"
1031              << e.what() << "\n";
1032         exit(1);    
1033     }
1034     catch(...)
1035     {
1036         phylipOutFile->close();
1037         cerr << "An exception has occured in the phylipOut function.\n";
1038         exit(1);    
1039     }      
1040 }
1041
1042 void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput)
1043 {
1044     int firstRes = partToOutput._firstRes;
1045     int lastRes = partToOutput._lastRes;
1046     int firstSeq = partToOutput._firstSeq;
1047     int lastSeq = partToOutput._lastSeq;    
1048     try
1049     {
1050         char residue;
1051         int val;
1052         int i, ii, chunks, block;    
1053         int j, k, pos1, pos2;    
1054         const SeqArray* alignment = alignPtr->getSeqArray();
1055         rangeNum rnum;
1056
1057         int length = lastRes - firstRes + 1; //- nige
1058
1059         chunks = length / GCG_LINELENGTH; //- nige
1060         if(length % GCG_LINELENGTH != 0) //- nige
1061         {
1062             ++chunks;
1063         }
1064   
1065         (*nexusOutFile) << "#NEXUS\n";
1066         (*nexusOutFile) << "BEGIN DATA;\n";
1067         (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs() 
1068                         << " nchar=" << length << ";\n"; //- nige
1069         (*nexusOutFile) << "format missing=?\n";
1070         (*nexusOutFile) << "symbols=\"";
1071         
1072         for(i = 0; i <= userParameters->getMaxAA(); i++)
1073         {
1074             (*nexusOutFile) << userParameters->getAminoAcidCode(i);
1075         }
1076         (*nexusOutFile) << "\"\n";
1077         (*nexusOutFile) << "interleave datatype=";
1078         bool _dnaFlag = userParameters->getDNAFlag();
1079         string _type = _dnaFlag ? "DNA " : "PROTEIN ";
1080         (*nexusOutFile) << _type;
1081         (*nexusOutFile) << "gap= -;\n";
1082         (*nexusOutFile) << "\nmatrix";
1083   
1084         for(block = 1; block <= chunks; block++) 
1085         {
1086             pos1 = ((block - 1) * GCG_LINELENGTH) + 1;
1087             pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige
1088             for(ii = firstSeq; ii <= lastSeq; ii++)  
1089             {
1090                 i = alignPtr->getOutputIndex(ii - 1);
1091                 if (!userParameters->getSeqRange()) 
1092                 {
1093                     (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left
1094                                     << alignPtr->getName(i) << " ";
1095                 }
1096                 else 
1097                 {
1098                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1099                     
1100                     std::stringstream ss;
1101                     std::stringstream ss2;
1102                     string rangeStart;
1103                     string rangeEnd;
1104                     ss << rnum.start;
1105                     ss >> rangeStart;
1106                     ss2 << rnum.end;
1107                     ss2 >> rangeEnd;
1108                     string nameAndRange = nameonly(alignPtr->getName(i)) + "/";
1109                     nameAndRange += rangeStart + "-" + rangeEnd;
1110                     (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15)
1111                                     << left
1112                                     << nameAndRange;                    
1113                 }
1114                 for(j = pos1, k = 1; j <= pos2; j++, k++) 
1115                 {
1116                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1117                     {
1118                         //val = alignPtr.getResidue(i, j + firstRes - 1);
1119                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1120                     }
1121                     else
1122                     {
1123                         val = -3;
1124                     }
1125                     if((val == -3) || (val == 253))
1126                     {
1127                         break;
1128                     }
1129                     else if((val < 0) || (val > userParameters->getMaxAA()))
1130                     {
1131                         residue = '-';
1132                     }
1133                     else 
1134                     {
1135                         residue = userParameters->getAminoAcidCode(val);
1136                     }
1137                     (*nexusOutFile) << residue;
1138                 }
1139             }
1140             (*nexusOutFile) << "\n";
1141         }
1142         (*nexusOutFile) << ";\nend;\n";   
1143     }
1144     catch(const bad_alloc& e)
1145     {
1146         nexusOutFile->close();
1147         cerr << "A bad_alloc exception has occured in the nexusOut function.\n"
1148              << e.what() << "\n";
1149         exit(1);
1150     }
1151     catch(VectorOutOfRange e)
1152     {
1153         nexusOutFile->close();
1154         cerr << "An exception has occured in the nexusOut function.\n"
1155              << e.what() << "\n";
1156         exit(1);    
1157     }
1158     catch(...)
1159     {
1160         nexusOutFile->close();
1161         cerr << "An exception has occured in the nexusOut function.\n";
1162         exit(1);    
1163     }      
1164 }
1165
1166 /*
1167  * gdeOut: print out the alignment in gde file format.
1168  */
1169 void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput)
1170 {
1171     int firstRes = partToOutput._firstRes;
1172     int lastRes = partToOutput._lastRes;
1173     int firstSeq = partToOutput._firstSeq;
1174     int lastSeq = partToOutput._lastSeq;    
1175
1176     int length = lastRes - firstRes + 1; //- nige
1177
1178     try
1179     {
1180         char residue;
1181         int val;
1182         vector<char> _ssMask1, _ssMask2, seq;
1183         int i, ii;
1184         int j, slen;    
1185         int lineLength;
1186         const SeqArray* alignment = alignPtr->getSeqArray();
1187         rangeNum rnum;
1188
1189         seq.assign(alignPtr->getMaxAlnLength() + 1, '0');
1190         
1191         // decide the line length for this alignment - maximum is LINELENGTH 
1192         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1193         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1194         if (lineLength > LINELENGTH || lineLength <= 0) 
1195         {
1196             lineLength = LINELENGTH;
1197         }
1198   
1199         // If we are using the secondary structure info from profile 1, set it up
1200         if (userParameters->getStructPenalties1() == SECST && 
1201             userParameters->getUseSS1() == true) 
1202         {
1203             int _lengthSeq1 = alignPtr->getSeqLength(1);
1204             vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1205             _ssMask1.assign(_lengthSeq1 + 10, 0);
1206             
1207             for (i = 0;i < _lengthSeq1;i++)
1208             {
1209                 _ssMask1[i] = _secStructMask1->at(i);
1210             }
1211             printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1);
1212         }
1213         
1214         // If we are using the secondary structure info from profile 2, set it up
1215         if (userParameters->getStructPenalties2() == SECST && 
1216             userParameters->getUseSS2() == true) 
1217         {
1218             int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1219             int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1220             _ssMask2.assign(lengthSeqProfile2 + 10, 0);
1221             vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1222             
1223             for (i=0;i < lengthSeqProfile2;i++)
1224             {
1225                 _ssMask2[i] = _secStructMask2->at(i);
1226             }
1227             printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2);  
1228         }
1229
1230         bool _dnaFlag = userParameters->getDNAFlag();
1231         string _prefix = _dnaFlag ? "#" : "%";
1232         
1233         for(ii = firstSeq; ii <= lastSeq; ii++) 
1234         {
1235             i = alignPtr->getOutputIndex(ii - 1);
1236             
1237             (*gdeOutFile) << _prefix;
1238             if(!userParameters->getSeqRange()) 
1239             {
1240                 (*gdeOutFile) << alignPtr->getName(i) << "\n";
1241             }
1242             else 
1243             {
1244                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1245                 (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/"
1246                               << rnum.start << "-" << rnum.end << "\n";
1247             }
1248             slen = 0;
1249
1250             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1251             for(j = firstRes; j <= lastRes; j++) //- nige
1252             {
1253                 if (j <= alignPtr->getSeqLength(i))
1254                 {
1255                     //val = alignPtr.getResidue(i, j);
1256                     val = (*alignment)[i][j]; // NOTE june29
1257                 }
1258                 else
1259                 { 
1260                     val = -3;
1261                 }
1262                 if((val == -3) || (val == 253))
1263                 {
1264                     break;
1265                 }
1266                 else if((val < 0) || (val > userParameters->getMaxAA()))
1267                 {
1268                     residue = '-';
1269                 }
1270                 else 
1271                 {
1272                     residue = userParameters->getAminoAcidCode(val);
1273                 }
1274                 if (userParameters->getLowercase())
1275                 {
1276                     seq[j - firstRes] = (char)tolower((int)residue);
1277                 }
1278                 else
1279                 {
1280                     seq[j - firstRes] = residue;
1281                 }
1282                 slen++;
1283             }
1284             for(j = 1; j <= slen; j++) 
1285             {
1286                 (*gdeOutFile) << seq[j-1];
1287                 if((j % lineLength == 0) || (j == slen)) 
1288                 {
1289                     (*gdeOutFile) << "\n";
1290                 }
1291             }
1292         }
1293   
1294         if (userParameters->getOutputStructPenalties() == 0 || 
1295             userParameters->getOutputStructPenalties() == 2) 
1296         {
1297             if (userParameters->getStructPenalties1() == SECST && 
1298                 userParameters->getUseSS1() == true) 
1299             {
1300                 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1301                               << alignPtr->getSecStructName1() << "\n";
1302                               
1303                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1304                 for(i = firstRes; i <= lastRes; i++) //- nige
1305                 {
1306                     val = _ssMask1[i - 1];
1307                     if (val == userParameters->getGapPos1() || 
1308                         val == userParameters->getGapPos2())
1309                     {
1310                         seq[i - firstRes] = '-';
1311                     }
1312                     else
1313                     {
1314                         seq[i - firstRes] = val;
1315                     }
1316                 }
1317
1318                 for(i = 1; i <= lastRes; i++) 
1319                 {
1320                     (*gdeOutFile) << seq[i-1];
1321                     if((i % lineLength == 0) || (i == lastRes)) 
1322                     {
1323                         (*gdeOutFile) << "\n";
1324                     }
1325                 }
1326             }
1327     
1328             if (userParameters->getStructPenalties2() == SECST && 
1329                 userParameters->getUseSS2() == true) 
1330             {
1331                 (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left
1332                               << alignPtr->getSecStructName2() << "\n";
1333                                               
1334                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1335                 for(i = firstRes; i <= lastRes; i++) //- nige
1336                 {
1337                     val = _ssMask2[i - 1];
1338                     if (val == userParameters->getGapPos1() || 
1339                         val == userParameters->getGapPos2())
1340                     {
1341                         seq[i - firstRes] = '-';
1342                     }
1343                     else
1344                     {
1345                         seq[i - firstRes] = val;
1346                     }
1347                 }
1348
1349                 //for(i = 1; i <= lastRes; i++) //- nige
1350                 for(i = 1; i <= length; i++) //- nige
1351                 {
1352                     (*gdeOutFile) << seq[i - 1];
1353                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1354                     if((i % lineLength == 0) || (i == length)) //- nige
1355                     {
1356                         (*gdeOutFile) << "\n";
1357                     }
1358                 }
1359             }
1360         }
1361         if (userParameters->getOutputStructPenalties() == 1 || 
1362             userParameters->getOutputStructPenalties() == 2) 
1363         {
1364             if (userParameters->getStructPenalties1() != NONE && 
1365                 userParameters->getUseSS1() == true) 
1366             {
1367                 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1368                               << alignPtr->getSecStructName1() << "\n";
1369                 
1370                 vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();          
1371                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1372                 for(i = firstRes; i <= lastRes; i++) //- nige
1373                 {
1374                     val = _gapPenaltyMask1->at(i - 1);
1375                     if (val == userParameters->getGapPos1() || 
1376                         val == userParameters->getGapPos2())
1377                     {
1378                         seq[i - firstRes] = '-';
1379                     }
1380                     else
1381                     {
1382                         seq[i - firstRes] = val;
1383                     }
1384                 }
1385
1386                 //for(i = 1; i <= lastRes; i++) //- nige
1387                 for(i = 1; i <= length; i++) //- nige
1388                 {
1389                     (*gdeOutFile) << seq[i - 1];
1390                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1391                     if((i % lineLength == 0) || (i == length)) //- nige
1392                     {
1393                         (*gdeOutFile) << "\n";
1394                     }
1395                 }
1396             }
1397             if (userParameters->getStructPenalties2() != NONE && 
1398                 userParameters->getUseSS2() == true) 
1399             {
1400                 (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left
1401                               << alignPtr->getSecStructName2() << "\n";
1402                 
1403                 vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();    
1404                 //for(i = firstRes; i < firstRes + lastRes; i++) //- nige
1405                 for(i = firstRes; i < length; i++) //- nige
1406                 {
1407                     val = _gapPenaltyMask2->at(i-1);
1408                     if (val == userParameters->getGapPos1() || 
1409                         val == userParameters->getGapPos2())
1410                     {
1411                         seq[i - firstRes] = '-';
1412                     }
1413                     else
1414                     {
1415                         seq[i - firstRes] = val;
1416                     }
1417                 }
1418
1419                 //for(i = 1; i <= lastRes; i++) //- nige
1420                 for(i = 1; i <= length; i++) //- nige
1421                 {
1422                     (*gdeOutFile) << seq[i - 1];
1423                     //if((i % lineLength == 0) || (i == lastRes)) //- nige
1424                     if((i % lineLength == 0) || (i == length)) //- nige
1425                     {
1426                         (*gdeOutFile) << "\n";
1427                     }
1428                 }
1429             }
1430         }
1431         gdeOutFile->close();   
1432     }
1433     catch(const bad_alloc& e)
1434     {
1435         gdeOutFile->close();
1436         cerr << "A bad_alloc exception has occured in the gdeOut function.\n"
1437              << e.what() << "\n";
1438         exit(1);
1439     }
1440     catch(VectorOutOfRange e)
1441     {
1442         gdeOutFile->close();
1443         cerr << "An exception has occured in the gdeOut function.\n"
1444              << e.what() << "\n";
1445         exit(1);    
1446     }
1447     catch(...)
1448     {
1449         gdeOutFile->close();
1450         cerr << "An exception has occured in the gdeOut function.\n";
1451         exit(1);    
1452     }     
1453 }
1454
1455 void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput)
1456 {
1457     char residue;
1458     int val;
1459     int i, ii;
1460     int j, slen;    
1461     int lineLength;
1462     rangeNum rnum;
1463     int firstRes = partToOutput._firstRes;
1464     int lastRes = partToOutput._lastRes;
1465     int firstSeq = partToOutput._firstSeq;
1466     int lastSeq = partToOutput._lastSeq;
1467             
1468     try
1469     {
1470         int _maxLength = alignPtr->getMaxAlnLength();
1471         vector<char> sequence;
1472         sequence.assign(_maxLength + 1, '0');
1473         const SeqArray* alignment = alignPtr->getSeqArray();
1474         // decide the line length for this alignment - maximum is LINELENGTH 
1475         lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1476         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1477         if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07
1478         {
1479             lineLength = LINELENGTH;
1480         }
1481         
1482         // Get name prefix. DL if DNA, P1 if protein
1483         string namePrefix;
1484         bool _dnaFlag = userParameters->getDNAFlag();
1485         namePrefix = _dnaFlag ? ">DL;" : ">P1;";
1486         
1487         //int length = lastRes - firstRes + 1; //- nige
1488
1489         for(ii = firstSeq; ii <= lastSeq; ii++) 
1490         {
1491             i = alignPtr->getOutputIndex(ii - 1);
1492             
1493             (*nbrfOutFile) << namePrefix;
1494             
1495             if (!userParameters->getSeqRange()) 
1496             {
1497                 (*nbrfOutFile) << alignPtr->getName(i) << "\n"
1498                                << alignPtr->getTitle(i) << "\n";
1499             }
1500             else 
1501             {
1502                 findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1503                 (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start
1504                                << "-"<< rnum.end << "\n"
1505                                << alignPtr->getTitle(i) << "\n";
1506             }
1507             slen = 0;
1508             //for(j = firstRes; j < firstRes + lastRes; j++) //- nige
1509             for(j = firstRes; j <= lastRes; j++) //- nige
1510             {
1511                 // NOTE I changed this here!!!!!
1512                 if (j <= alignPtr->getSeqLength(i))
1513                 {
1514                     //val = alignPtr.getResidue(i, j);
1515                     val = (*alignment)[i][j]; // NOTE june29
1516                 }
1517                 else
1518                 { 
1519                     val = -3;
1520                 }
1521                 if((val == -3) || (val == 253))
1522                 {
1523                     break;
1524                 }
1525                 else if((val < 0) || (val > userParameters->getMaxAA()))
1526                 {
1527                     residue = '-';
1528                 }
1529                 else 
1530                 {
1531                     residue = userParameters->getAminoAcidCode(val);
1532                 }
1533                 sequence[j - firstRes] = residue;
1534                 slen++;
1535             }
1536             for(j = 1; j <= slen; j++) 
1537             {
1538                 (*nbrfOutFile) << sequence[j - 1];
1539                 if((j % lineLength == 0) || (j == slen))
1540                 { 
1541                     (*nbrfOutFile) << "\n";
1542                 }
1543             }
1544             (*nbrfOutFile) << "*\n";
1545         }    
1546         nbrfOutFile->close();
1547     }
1548     catch(const bad_alloc& e)
1549     {
1550         nbrfOutFile->close();
1551         cerr << "A bad_alloc exception has occured in the nbrfOut function.\n"
1552              << e.what() << "\n";
1553         exit(1);
1554     }
1555     catch(VectorOutOfRange e)
1556     {
1557         nbrfOutFile->close();
1558         cerr << "An exception has occured in the nbrfOut function.\n"
1559              << e.what() << "\n";
1560         exit(1);    
1561     }
1562     catch(...)
1563     {
1564         nbrfOutFile->close();
1565         cerr << "An exception has occured in the nbrfOut function.\n";
1566         exit(1);    
1567     }    
1568 }
1569
1570 /* 
1571  * The function clustalOut is used to ouput the Alignment into a clustal format
1572  * file.
1573  */
1574 void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput)
1575 {
1576     int firstRes = partToOutput._firstRes;
1577     int lastRes = partToOutput._lastRes;
1578     int firstSeq = partToOutput._firstSeq;
1579     int lastSeq = partToOutput._lastSeq;
1580         
1581     try
1582     {
1583         vector<char> seq1;    
1584         vector<int> seqNo;
1585         vector<int> printSeqNo;    
1586         vector<char> ss_mask1, ss_mask2;
1587         const SeqArray* alignment = alignPtr->getSeqArray();
1588         char  temp[MAXLINE];
1589         char c;
1590         int val;
1591         int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks;
1592         int i, j, k, l;
1593         int pos, ptr;
1594         int lineLength;
1595
1596         rangeNum rnum;
1597         string tmpStr = "";
1598         string sequenceLine = "";
1599
1600
1601         int _numSequences = alignPtr->getNumSeqs();
1602         // PMcG revert to Clustalw1.83 output format for name width
1603         const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY); 
1604
1605         seqNo.assign(_numSequences + 1, 0);
1606         printSeqNo.assign(_numSequences + 1, 0);
1607     
1608         // check that lastSeq <= _numSequences
1609         if(lastSeq > _numSequences)
1610         {
1611             lastSeq = _numSequences;
1612         }
1613     
1614         for (i = firstSeq; i <= lastSeq; i++)
1615         {
1616             printSeqNo[i] = seqNo[i] = 0;
1617             for(j = 1; j < firstRes; j++) 
1618             {
1619                 //val = alignPtr.getResidue(i, j);
1620                 val = (*alignment)[i][j]; // NOTE june29
1621                 if((val >= 0) || (val <= userParameters->getMaxAA()))
1622                 { 
1623                     seqNo[i]++;
1624                 }
1625             }
1626         }
1627
1628         seq1.assign(alignPtr->getMaxAlnLength() + 1, 0);
1629         // Check if we have secondary structure in file 1 and if we want to output it.
1630         if (userParameters->getStructPenalties1() == SECST && 
1631             userParameters->getUseSS1() == true) 
1632         {
1633             int lengthSeq1 = alignPtr->getSeqLength(1);
1634             ss_mask1.assign(lengthSeq1 + 10, 0);
1635             vector<char>* _secStructMask1 = alignPtr->getSecStructMask1();
1636         
1637             for (i = 0; i < lengthSeq1; i++)
1638             {
1639                 ss_mask1[i] = _secStructMask1->at(i);
1640             }
1641             printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1);
1642           
1643         }
1644         
1645         // Check if we have secondary structure in file 2 and if we want to output it.
1646         if (userParameters->getStructPenalties2() == SECST && 
1647             userParameters->getUseSS2() == true &&
1648             userParameters->getProfile2Empty() == false)
1649         {
1650             // AW added test getProfile2Empty was meant to fix bug
1651             // 141, but only prevents segfault, output still faulty.
1652             // Has to be fixed somewhere else
1653             int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1;
1654             int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq);
1655             ss_mask2.assign(lengthSeqProfile2 + 10, 0);
1656             vector<char>* _secStructMask2 = alignPtr->getSecStructMask2();
1657             for (i = 0; i < lengthSeqProfile2; i++)
1658             {
1659                 ss_mask2[i] = _secStructMask2->at(i);
1660             }
1661             printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2);
1662         }
1663     
1664         (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel() 
1665                           << " multiple sequence alignment\n\n";
1666     
1667         // decide the line length for this alignment - maximum is LINELENGTH
1668
1669         //PMcG  9-2-2008 make line output same as 1.83
1670         //lineLength = PAGEWIDTH - alignPtr->getMaxNames();
1671         lineLength = PAGEWIDTH - NAMESWIDTH;
1672         lineLength = lineLength - lineLength % 10; // round to a multiple of 10
1673         if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07
1674         { 
1675             lineLength = LINELENGTH;
1676         }
1677     
1678         int length = lastRes - firstRes + 1; //- nige
1679
1680         chunks = length / lineLength; //- nige
1681         if(length % lineLength != 0) //- nige
1682         {
1683           ++chunks;
1684         }
1685         //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n",
1686         //       firstRes, lastRes, length, chunks);
1687
1688         // This will loop through each of the blocks.
1689         for(lv1 = 1; lv1 <= chunks; ++lv1) 
1690         {
1691             // pos is begining of chunk, ptr is the end of the chunk to be displayed.
1692             pos = ((lv1 - 1) * lineLength) + 1; 
1693             ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige
1694    
1695             (*clustalOutFile) << "\n";
1696             int _outStructPenalty = userParameters->getOutputStructPenalties();
1697             
1698             string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07
1699             
1700             if((int)secStructName1.size() > MAXNAMESTODISPLAY)
1701             {
1702                 //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY);
1703                 secStructName1 = secStructName1.substr(0, NAMESWIDTH);
1704             }            
1705             if (_outStructPenalty == 0 || _outStructPenalty == 2) 
1706             {
1707                 if (userParameters->getStructPenalties1() == SECST && 
1708                     userParameters->getUseSS1() == true) 
1709                 {
1710                     for(i = pos; i <= ptr; ++i) 
1711                     {
1712                         // Check if we can access mask position first
1713                         if((int)ss_mask1.size() > i + firstRes - 2)  
1714                         {
1715                             val = ss_mask1[i + firstRes - 2]; 
1716                             if (val == userParameters->getGapPos1() || 
1717                                 val == userParameters->getGapPos2())
1718                             {
1719                                 temp[i - pos] = '-';
1720                             }
1721                             else
1722                             {
1723                                 temp[i - pos] = val;
1724                             }
1725                         }
1726                         else
1727                         {
1728                             break;
1729                         }
1730                     }
1731                     temp[i - pos] = EOS;  
1732                                       
1733                     if(userParameters->getSeqRange())
1734                     {
1735                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1736                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1737                                                             clusSecStructOffset)
1738                                           << left << secStructName1 
1739                                           << "  " << temp << "\n";
1740                     }
1741                     else
1742                     {
1743                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1744                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1745                                           << left << secStructName1 << "  " 
1746                                           << temp << "\n";
1747                     }
1748                 }
1749             }
1750             if (_outStructPenalty == 1 || _outStructPenalty == 2) 
1751             {
1752                 if (userParameters->getStructPenalties1() != NONE && 
1753                     userParameters->getUseSS1() == true) 
1754                 {
1755                     vector<char>* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1();       
1756                     for(i = pos; i <= ptr; ++i) 
1757                     {
1758                         if((int)_gapPenaltyMask1->size() > i + firstRes - 2) 
1759                         {
1760                             val = _gapPenaltyMask1->at(i + firstRes - 2); 
1761                             if (val == userParameters->getGapPos1() || 
1762                                 val == userParameters->getGapPos2())
1763                             {
1764                                 temp[i - pos] = '-';
1765                             }
1766                             else
1767                             {
1768                                 temp[i - pos] = val;
1769                             }
1770                         }
1771                         else
1772                         {
1773                             break;
1774                         }
1775                     }
1776                     temp[i - pos] = EOS;
1777                     
1778                     //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1779                     (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1780                                       << secStructName1 << "  "
1781                                       << temp << "\n";
1782                 }
1783             }
1784             
1785             
1786             string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07
1787             //if((int)secStructName2.size() > MAXNAMESTODISPLAY)
1788             if((int)secStructName2.size() > NAMESWIDTH)
1789             {
1790                 //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY);
1791                 secStructName2 = secStructName2.substr(0, NAMESWIDTH);
1792             }
1793                                 
1794             if (_outStructPenalty == 0 || _outStructPenalty == 2) 
1795             {
1796                 if (userParameters->getStructPenalties2() == SECST && 
1797                     userParameters->getUseSS2() == true) 
1798                 {
1799                     for(i = pos; i <= ptr; ++i) 
1800                     {
1801                         if((int)ss_mask2.size() > i + firstRes - 2)
1802                         {
1803                             val = ss_mask2[i + firstRes - 2]; 
1804                             if (val == userParameters->getGapPos1() || 
1805                                 val == userParameters->getGapPos2())
1806                             {
1807                                 temp[i - pos] = '-';
1808                             }
1809                             else
1810                             {
1811                                 temp[i - pos] = val;
1812                             }
1813                         }
1814                         else
1815                         {
1816                             break;
1817                         }
1818                     }
1819                     temp[i - pos] = EOS;
1820                     
1821                                         
1822                     if (userParameters->getSeqRange())
1823                     {
1824                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY +
1825                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH +
1826                                                             clusSecStructOffset)
1827                                           << left << secStructName2; 
1828                         (*clustalOutFile) << "  " << temp << "\n";
1829                     }
1830                     else
1831                     {
1832                         //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY)
1833                         (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH)
1834                                           << left << secStructName2 << "  " 
1835                                           << temp << "\n";
1836                     }
1837                 }
1838             }
1839             
1840             if (_outStructPenalty == 1 || _outStructPenalty == 2) 
1841             {
1842                 if (userParameters->getStructPenalties2() != NONE && 
1843                     userParameters->getUseSS2() == true) 
1844                 {
1845                     vector<char>* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2();
1846                     for(i = pos; i <= ptr; ++i) 
1847                     {
1848                         if((int)_gapPenaltyMask2->size() > i + firstRes - 2)
1849                         {
1850                             val = _gapPenaltyMask2->at(i + firstRes - 2); 
1851                             if (val == userParameters->getGapPos1() || 
1852                                 val == userParameters->getGapPos2())
1853                             {
1854                                 temp[i - pos] = '-';
1855                             }
1856                             else
1857                             {
1858                                 temp[i - pos] = val;
1859                             }
1860                         }
1861                         else
1862                         {
1863                             break;
1864                         }
1865                     }
1866                     temp[i - pos] = EOS;
1867                     
1868                     //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left
1869                     (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left
1870                                       << secStructName2 << "  "
1871                                       << temp << "\n";
1872                 }
1873             }
1874         
1875             for(ii = firstSeq; ii <= lastSeq; ++ii) 
1876             {
1877                 i = alignPtr->getOutputIndex(ii - 1); 
1878                 printSeqNo[i] = 0;
1879                 for(j = pos; j <= ptr; ++j) 
1880                 {
1881                     if (j + firstRes - 1 <= alignPtr->getSeqLength(i))
1882                     {
1883                         //val = alignPtr.getResidue(i, j + firstRes - 1);
1884                         val = (*alignment)[i][j + firstRes - 1]; // NOTE june29
1885                     }
1886                     else
1887                     { 
1888                         val = -3;
1889                     }
1890                     if((val == -3) || (val == 253)) 
1891                     {
1892                         break;
1893                     }
1894                     else if((val < 0) || (val > userParameters->getMaxAA()))
1895                     {
1896                         seq1[j] = '-';
1897                     }
1898                     else 
1899                     {
1900                         seq1[j] = userParameters->getAminoAcidCode(val);
1901                         seqNo[i]++;
1902                         printSeqNo[i] = 1;
1903                     } 
1904                 }
1905                 for(; j <= ptr; ++j) 
1906                 {
1907                     seq1[j]='-';
1908                 }
1909             
1910                 sequenceLine = "";
1911
1912                 for(int index = pos; index < ptr + 1; index++)
1913                 {
1914                     sequenceLine += seq1[index];
1915                 }
1916                 
1917                 string seqName = alignPtr->getName(i);
1918                 //if((int)seqName.size() > MAXNAMESTODISPLAY)
1919                 if((int)seqName.size() > NAMESWIDTH)
1920                 {
1921                     //seqName = seqName.substr(0, MAXNAMESTODISPLAY);
1922                     seqName = seqName.substr(0, NAMESWIDTH);
1923                 }
1924                 
1925                 if (!userParameters->getSeqRange()) 
1926                 {
1927                     // NOTE I made a change here from + 5, to + 6.
1928                     //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left
1929                     //                  << alignPtr->getName(i);
1930
1931                     //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left
1932                     //                  << seqName; 
1933                     // PMcG changed this to revert to behaviour of clustalw1.83
1934                     // 
1935                     //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6)
1936                     (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName; 
1937
1938
1939                 }
1940                 else // Put the sequence range information in! 
1941                 {
1942                     findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii);
1943                     string nameOnly = nameonly(seqName);
1944                     std::stringstream ss;
1945                     std::stringstream ss2;
1946                     string rangeStart;
1947                     string rangeEnd;
1948                     ss << rnum.start;
1949                     ss >> rangeStart;
1950                     ss2 << rnum.end;
1951                     ss2 >> rangeEnd;
1952                     tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd;
1953                     //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset) 
1954                     (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset) 
1955                                       << left << tmpStr;
1956                 }
1957             
1958                 (*clustalOutFile) << sequenceLine;
1959             
1960                 if (userParameters->getClSeqNumbers() && printSeqNo[i])
1961                 {
1962                     (*clustalOutFile) << " " << seqNo[i];
1963                 }
1964             
1965                 (*clustalOutFile) << "\n";
1966             }
1967         
1968             // Now print out the conservation information!
1969             for(i = pos; i <= ptr; ++i) 
1970             {
1971                 seq1[i] = ' ';
1972                 ident = 0;
1973
1974                 for(j = 1; j <= (int)strongGroup.size(); j++)
1975                 {
1976                     catident1[j - 1] = 0;
1977                 }
1978                 for(j = 1; j <= (int)weakGroup.size(); j++) 
1979                 {
1980                     catident2[j - 1] = 0;
1981                 }
1982                 
1983                 for(j = firstSeq; j <= lastSeq; ++j) 
1984                 {
1985                     if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) &&
1986                        (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq)))
1987                     {// NOTE june29
1988                         if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) && 
1989                             ((*alignment)[firstSeq][i + firstRes - 1] <=
1990                             userParameters->getMaxAA())) 
1991                         {
1992                             // Count how many are identical to the first sequence
1993                             if((*alignment)[firstSeq][i + firstRes - 1] == 
1994                                 (*alignment)[j][i + firstRes - 1])
1995                             {
1996                                 ++ident;
1997                             }
1998                             // Count how many are in the same category.
1999                             for(k = 1; k <= (int)strongGroup.size(); k++) 
2000                             {
2001                                 for(l = 0; (c = strongGroup[k - 1][l]); l++) 
2002                                 {
2003                                     int resCode = (*alignment)[j][i + firstRes - 1];
2004                                     if(resCode <= userParameters->getMaxAA() + 1)
2005                                     {
2006                                         if (userParameters->getAminoAcidCode(resCode) == c)
2007                                         {
2008                                             catident1[k - 1]++;
2009                                             break;
2010                                         }
2011                                     }
2012                                 }
2013                             }
2014                             for(k = 1; k <= (int)weakGroup.size(); k++) 
2015                             {
2016                                 for(l = 0; (c = weakGroup[k - 1][l]); l++) 
2017                                 {//NOTE june29
2018                                     int resCode = (*alignment)[j][i + firstRes - 1];
2019                                     if(resCode <= userParameters->getMaxAA() + 1)
2020                                     {
2021                                         if (userParameters->getAminoAcidCode(resCode) == c)
2022                                         {
2023                                             catident2[k - 1]++;
2024                                             break;
2025                                         }
2026                                     }
2027                                 }
2028                             }
2029                         }
2030                     }    
2031                 }
2032             
2033                 // Now do the conservation part for each block. 
2034                 if(ident == lastSeq - firstSeq + 1)
2035                 {
2036                     seq1[i] = '*'; // All residues the same!
2037                 }
2038                 else if (!userParameters->getDNAFlag()) 
2039                 {
2040                     for(k = 1; k <= (int)strongGroup.size(); k++) 
2041                     {
2042                         if (catident1[k - 1] == lastSeq - firstSeq + 1) 
2043                         {
2044                             seq1[i] = ':'; // All residues member of the same category
2045                             break;
2046                         }
2047                     }
2048                     if(seq1[i] == ' ')
2049                     {
2050                         for(k = 1; k <= (int)weakGroup.size(); k++) 
2051                         {
2052                             if (catident2[k - 1] == lastSeq - firstSeq + 1) 
2053                             {
2054                                 seq1[i] = '.'; // All residues member of the same category
2055                                 break;
2056                             }
2057                         }
2058                     }
2059                 }
2060             }
2061
2062             sequenceLine = "";
2063             for(int index = pos; index < ptr + 1; index++)
2064             {
2065                 sequenceLine += seq1[index];
2066             }
2067
2068             //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++)
2069             for(k = 0; k < NAMESWIDTH + 6; k++)
2070             { 
2071                 (*clustalOutFile) << " ";
2072             }
2073             if(userParameters->getSeqRange()) 
2074             {
2075
2076                 (*clustalOutFile) << "          ";
2077             }
2078
2079             (*clustalOutFile) << sequenceLine << "\n";
2080         }
2081         clustalOutFile->close();
2082     }
2083     catch(const bad_alloc& e)
2084     {
2085         clustalOutFile->close();
2086         cerr << "A bad_alloc exception has occured in the clustalOut function.\n"
2087              << e.what() << "\n";
2088         exit(1);
2089     }
2090     catch(VectorOutOfRange e)
2091     {
2092         clustalOutFile->close();
2093         cerr << "An exception has occured in the clustalOut function.\n"
2094              << e.what() << "\n";
2095         exit(1);    
2096     }
2097     catch(exception e)
2098     {
2099         clustalOutFile->close();
2100         cerr << "An exception has occured in the clustalOut function.\n"
2101              << e.what() << "\n";
2102         exit(1);    
2103     }
2104     catch(...)
2105     {
2106         clustalOutFile->close();
2107         cerr << "An exception has occured in the clustalOut function.\n";
2108         exit(1);    
2109     }    
2110 }
2111
2112 /*
2113  * The function printSecStructMask takes in the 2 mask vectors. It makes changes to 
2114  * mask and puts the result in structMask. This is the one that will be printed out.
2115  * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no
2116  * way to do random access setting in vectors otherwise.
2117  */
2118 void AlignmentOutput::printSecStructMask(int prfLength, vector<char>* mask, 
2119                                          vector<char>* structMask)
2120 {
2121     int i, j;
2122 //    calculate the gap penalty mask from the secondary structures
2123
2124     i = 0;
2125     try
2126     {
2127         while (i < prfLength) 
2128         {
2129             if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') 
2130             {
2131                 for (j = 0; j < userParameters->getHelixEndMinus(); j++) 
2132                 {
2133                     if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a'
2134                                     && mask->at(i+j) != '$')) 
2135                     {    
2136                         break;
2137                     }
2138                     (*structMask)[i+j] = 'a';
2139                 }
2140                 i += j;
2141                 while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') 
2142                 {
2143                     if (i >= prfLength) 
2144                         break;
2145                     if (mask->at(i) == '$') 
2146                     {
2147                         (*structMask)[i] = 'A';
2148                         i++;
2149                         break;
2150                     }
2151                     else 
2152                         (*structMask)[i] = (*mask)[i];
2153                     i++;
2154                 }
2155                 for (j = 0; j < userParameters->getHelixEndMinus(); j++) 
2156                 {
2157                     if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a'
2158                                         || mask->at(i-j-1) == '$'))
2159                     {
2160                         (*structMask)[i - j - 1] = 'a';
2161                     }
2162                 }
2163             }
2164             else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') 
2165             {
2166                 for (j = 0; j < userParameters->getStrandEndMinus(); j++) 
2167                 {
2168                     if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b'
2169                                         && mask->at(i+j) != '%')) 
2170                     {
2171                         break;
2172                     }
2173                     (*structMask)[i+j] = 'b';
2174                 }
2175                 i += j;
2176                 while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') 
2177                 {
2178                     if (i >= prfLength) 
2179                         break;
2180                     if (mask->at(i) == '%') 
2181                     {
2182                         (*structMask)[i] = 'B';
2183                         i++;
2184                         break;
2185                     }
2186                     else
2187                     { 
2188                         (*structMask)[i] = mask->at(i);
2189                     }
2190                     i++;
2191                 }
2192                 for (j = 0; j < userParameters->getStrandEndMinus(); j++) 
2193                 {
2194                     if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' || 
2195                         mask->at(i-j-1) == '%'))
2196                     {
2197                         (*structMask)[i-j-1] = 'b';
2198                     }
2199                 }
2200             }
2201             else
2202             { 
2203                 i++;
2204             }
2205         }
2206     }
2207     catch(const exception& e)
2208     {
2209         // Catch all the exceptions
2210         cerr << "There has been an exception in the function printSecStructMask\n"
2211              << e.what() << "\n";
2212         exit(1);
2213     }
2214 }
2215
2216 /*
2217  * The function findRangeValues is used to find the range to be printed out.
2218  *
2219  *
2220  */
2221 void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes, 
2222                                 int len, int firstSeq)
2223 {
2224     assert(rnum);
2225     assert(firstRes > 0);
2226     assert(len > 0);
2227     assert(firstSeq > 0);
2228     
2229     try
2230     {
2231         int val;
2232         int i, ii;
2233         int j, slen;    
2234
2235         char tmpName[FILENAMELEN + 15];
2236         int iStart = 0;
2237         int iEnd = 0; // to print sequence start-end with names
2238         int found = 0;
2239         int ngaps = 0;
2240         int tmpStart = 0; 
2241         int tmpEnd = 0;
2242         int ntermgaps = 0;
2243         int pregaps = 0;
2244         int tmpk = 0;
2245         int isRange = 0;
2246         int formula = 0;
2247         const SeqArray* alignment = alignPtr->getSeqArray();
2248         
2249         tmpName[0] = '\0';
2250         slen = 0;
2251
2252         ii = firstSeq ;
2253         i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere! 
2254         string name = alignPtr->getName(i);
2255         
2256         if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3)) 
2257         {
2258             isRange = 1;
2259         }
2260         
2261         for(tmpk = 1; tmpk < firstRes; tmpk++)
2262         { 
2263             // do this irrespective of above sscanf
2264             //val = alignPtr.getResidue(i, tmpk);
2265             val = (*alignment)[i][tmpk]; // NOTE june29
2266             if ((val < 0) || (val > userParameters->getMaxAA()))
2267             { 
2268                 //it is gap
2269                 pregaps++;
2270             }
2271         }
2272         
2273         for(j = firstRes; j < firstRes + len; j++) 
2274         {
2275             if(j > alignPtr->getSeqLength(i))
2276             {
2277                 val = -3; // Cant get it so break out.
2278             }
2279             else
2280             {
2281                 //val = alignPtr.getResidue(i, j);
2282                 val = (*alignment)[i][j]; // NOTE june29
2283             }
2284             if((val == -3) || (val == 253))
2285             {
2286                 break;
2287             }
2288             else if((val < 0) || (val > userParameters->getMaxAA())) 
2289             {
2290                 ngaps++;
2291             }
2292             else 
2293             {
2294                 found = j;
2295             }
2296             if ( found && (iStart == 0) ) 
2297             {
2298                 iStart = found;
2299                 ntermgaps = ngaps;
2300             }
2301             slen++;
2302         }
2303         if(userParameters->getSeqRange()) 
2304         {
2305             cout << "Name : " << alignPtr->getName(i) << " "
2306                  << "\n  firstRes = "<< firstRes << " "
2307                  << "   len = " << len << " "
2308                  << "\n  iStart = " << iStart << " "
2309                  << "\n  tmpStart = " << tmpStart << " "
2310                  << "\n  ngaps = " << ngaps << " "
2311                  << "\n  pregaps = " << pregaps << " ";
2312             if (!isRange)
2313             {
2314                 formula = iStart - pregaps;
2315             }
2316             else
2317             {
2318                 formula = iStart - pregaps +  ( tmpStart == 1 ? 0: tmpStart-1) ;
2319             }
2320
2321             cout << "\n\nsuggestion  iStart - pregaps + tmpStart - ntermgaps = "
2322                  << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps
2323                  << " formula " << formula << " ";
2324         }
2325         else 
2326         {
2327             cerr << "\n no range found .... strange,  iStart = " << iStart;
2328             formula = 1;
2329         }
2330         if (pregaps == firstRes - 1) // all gaps -  now the conditions........ 
2331         {
2332             formula = tmpStart ; // keep the previous start... 
2333         }
2334         formula = (formula <= 0) ? 1: formula;
2335         if (pregaps == 0 && tmpStart == 0) 
2336         {
2337             formula = firstRes;
2338         }
2339         iEnd = formula + len - ngaps -1;
2340         rnum->start = formula;
2341         rnum->end = iEnd;
2342         cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start 
2343              << " - " << rnum->end;
2344         cout << " Done checking.........";
2345     }
2346     catch(VectorOutOfRange e)
2347     {
2348         cerr << "An exception has occured in the findRangeValues function.\n"
2349              << e.what() << "\n";
2350         exit(1);    
2351     }    
2352     catch(...)
2353     {
2354         cerr << "An exception has occured in findRangeValues function\n";
2355         exit(1);
2356     }
2357 }
2358
2359 string AlignmentOutput::nameonly(string s)
2360 {
2361     string tmp;
2362     try
2363     {
2364         int i = 0;
2365
2366         while (i < (int)s.size()) 
2367         {
2368             if(s.at(i) != '/')
2369             {
2370                 tmp += s.at(i);
2371                 i++;
2372             }
2373             else
2374             {
2375                 break;
2376             }
2377         }
2378     }
2379     catch(const exception& e)
2380     {
2381         cerr << "An exception has occured in the function nameonly\n"
2382              << e.what();
2383         exit(1);
2384     }
2385     return tmp;
2386 }
2387
2388 int AlignmentOutput::SeqGCGCheckSum(vector<char>* seq, int length)
2389 {
2390     int i;
2391     long check;
2392     int seqResIndex = 1;
2393     //int seqLength = seq->size();
2394     
2395     for (i = 0, check = 0; i < length; i++, seqResIndex++)
2396     {
2397         char _toUpperCase = (*seq)[seqResIndex];
2398         check += ((i % 57) + 1) * toupper(_toUpperCase);
2399     }
2400
2401     return (check % 10000);
2402 }
2403
2404 void AlignmentOutput::showAlign()
2405 {
2406     //FILE *file;
2407     int  numLines;
2408     char temp[MAXLINE + 1];
2409     temp[0] = '\0';
2410     string fileName;
2411     string answer;
2412     
2413     if(userParameters->getOutputClustal()) 
2414     {
2415         fileName = clustalOutName;
2416     }
2417     else if(userParameters->getOutputNbrf()) 
2418     {
2419         fileName = nbrfOutName;
2420     }
2421     else if(userParameters->getOutputGCG()) 
2422     {
2423         fileName = gcgOutName;
2424     }
2425     else if(userParameters->getOutputPhylip()) 
2426     {
2427         fileName = phylipOutName;
2428     }
2429     else if(userParameters->getOutputGde()) 
2430     {
2431         fileName = gdeOutName;
2432     }
2433     else if(userParameters->getOutputNexus()) 
2434     {
2435         fileName = nexusOutName;
2436     }
2437     else if(userParameters->getOutputFasta()) 
2438     {
2439         fileName = fastaOutName;
2440     }
2441     else
2442     {
2443         return; // No file output type!
2444     }
2445
2446     ifstream _fileIn;
2447     _fileIn.open(fileName.c_str(), ios::in);        
2448     _fileIn.seekg(0, std::ios::beg); // start at the beginning
2449     
2450     cout << "\n\n";
2451     numLines = 0;
2452
2453     while(_fileIn.getline(temp, MAXLINE + 1, '\n')) 
2454     {
2455        //fputs(temp,stdout);
2456        cout << temp << "\n";
2457        ++numLines;
2458        if(numLines >= PAGE_LEN) 
2459        {
2460            cout << "\n";
2461            utilityObject->getStr(string("Press [RETURN] to continue or  X  to stop"), answer);
2462            if(toupper(answer[0]) == 'X') 
2463            {
2464                _fileIn.close();
2465                return;
2466            }
2467            else
2468            {
2469                numLines = 0;
2470            }
2471        }
2472     }
2473     _fileIn.close();
2474     cout << "\n";
2475     utilityObject->getStr(string("Press [RETURN] to continue"), answer);
2476 }
2477
2478 }
2479