X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=website%2Farchive%2Fbinaries%2Fmac%2Fsrc%2Fclustalw%2Fsrc%2Falignment%2FAlignmentOutput.cpp;fp=website%2Farchive%2Fbinaries%2Fmac%2Fsrc%2Fclustalw%2Fsrc%2Falignment%2FAlignmentOutput.cpp;h=a0b9de3e5580f47f199f6adb769853e382309e11;hb=dbde3fb6f00b9bb770343631a517c0e599db8528;hp=0000000000000000000000000000000000000000;hpb=85f830bbd51a7277994bd4233141016304e210c9;p=jabaws.git diff --git a/website/archive/binaries/mac/src/clustalw/src/alignment/AlignmentOutput.cpp b/website/archive/binaries/mac/src/clustalw/src/alignment/AlignmentOutput.cpp new file mode 100644 index 0000000..a0b9de3 --- /dev/null +++ b/website/archive/binaries/mac/src/clustalw/src/alignment/AlignmentOutput.cpp @@ -0,0 +1,2479 @@ +/** + * Author: Mark Larkin + * + * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. + */ +/** + * Output routines ported from clustalx C code in 'interface.c'. + * + * Changes: + * + * 18-04-07,Nigel Brown(EMBL): clustalx code used (firsRes, length) convention + * to define output columns, while the port uses (firsRes, lastRes). Fixed + * problems in all 7 output routines where the conventions were mixed giving + * over-long output blocks. + * + * 18-7-07: Mark (UCD), made changes to fastaOut, clustalOut, nbrfOut and gdeOut + * + * 9-2-08: Paul (UCD), changes to clustalOut to make output the same as 1.83 + * added const NAMESWIDTH which is calculated based on the + * length of the longest sequence name and the upper and lower + * limits specified in MAXNAMESTODISPLAY and MINNAMESTODISPLAY + */ + +#ifdef HAVE_CONFIG_H + #include "config.h" +#endif +#include "AlignmentOutput.h" + +namespace clustalw +{ + +AlignmentOutput::AlignmentOutput() +: clusSecStructOffset(9), + clusSequenceOffset(15+1) // MARK made a change here for test case findRangeValues 10seqs +{ + // NOTE in the old clustal these arrays ended with NULL as a value. + try + { + strongGroup.resize(9); + strongGroup[0] = string("STA"); + strongGroup[1] = string("NEQK"); + strongGroup[2] = string("NHQK"); + strongGroup[3] = string("NDEQ"); + strongGroup[4] = string("QHRK"); + strongGroup[5] = string("MILV"); + strongGroup[6] = string("MILF"); + strongGroup[7] = string("HY"); + strongGroup[8] = string("FYW"); + + weakGroup.resize(11); + weakGroup[0] = string("CSA"); + weakGroup[1] = string("ATV"); + weakGroup[2] = string("SAG"); + weakGroup[3] = string("STNK"); + weakGroup[4] = string("STPA"); + weakGroup[5] = string("SGND"); + weakGroup[6] = string("SNDEQK"); + weakGroup[7] = string("NDEQHK"); + weakGroup[8] = string("NEQHRK"); + weakGroup[9] = string("FVLIM"); + weakGroup[10] = string("HFY"); + } + catch(const exception& e) + { + cerr << "An exception has occured in the contructor of AlignmentOutput.\n" + << e.what() << "\n"; + exit(1); + } +} + +/* + * The function createAlignmentOutput is used to call all the individual functions + * for the different file types. It is possible to output the alignment in all file + * types at the same time. + */ +void AlignmentOutput::createAlignmentOutput(Alignment* alignPtr, int firstSeq, int lastSeq) +{ + int length; + int firstRes; // starting sequence range - Ramu + int lastRes; // ending sequence range + bool rangeOK; + + if((firstSeq <= 0) || (lastSeq < firstSeq)) + { + utilityObject->error("Cannot produce alignment output." + " Incorrect call to createAlignmentOutput." + " firstSeq = %d" + " lastSeq = %d\n", firstSeq, lastSeq); + return; + } + + length = 0; + firstRes = 1; + lastRes = 0; + rangeOK = false; + + try + { + length = alignPtr->getLengthLongestSequence(); + lastRes = length; + + if (userParameters->getRangeFromToSet()) + { + firstRes = userParameters->getRangeFrom(); + lastRes = userParameters->getRangeTo(); + // Check if the numbers are ok. + if ((firstRes > lastRes) || (firstRes == -1) || (lastRes == -1)) + { + cerr << "seqrange numbers are not set properly, using default....\n"; + firstRes = 1; + lastRes = length; + } + else + { + rangeOK = true; + } + } + if (rangeOK && (lastRes > length)) + { + lastRes = length; + cout << "Seqrange " << lastRes << " is more than the " << length + << " setting it to " << length << " \n"; + } + + if (userParameters->getMenuFlag()) + { + cout << "Consensus length = " << lastRes << " \n"; + } + + outputRegion partToOutput; + partToOutput._firstSeq = firstSeq; + partToOutput._lastSeq = lastSeq; + partToOutput._firstRes = firstRes; + partToOutput._lastRes = lastRes; + + if(userParameters->getOutputClustal()) + { + if(clustalOutFile.get() != 0 && clustalOutFile->is_open()) + { + clustalOut(alignPtr, partToOutput); + if(clustalOutFile->is_open()) + { + clustalOutFile->close(); + } + utilityObject->info("CLUSTAL-Alignment file created [%s]\n", + clustalOutName.c_str()); + } + } + if(userParameters->getOutputNbrf()) + { + if(nbrfOutFile.get() != 0 && nbrfOutFile->is_open()) + { + nbrfOut(alignPtr, partToOutput); + if(nbrfOutFile->is_open()) + { + nbrfOutFile->close(); + } + utilityObject->info("NBRF/PIR-Alignment file created [%s]\n", + nbrfOutName.c_str()); + } + } + if(userParameters->getOutputGCG()) + { + if(gcgOutFile.get() != 0 && gcgOutFile->is_open()) + { + gcgOut(alignPtr, partToOutput); + if(gcgOutFile->is_open()) + { + gcgOutFile->close(); + } + utilityObject->info("GCG-Alignment file created [%s]\n", + gcgOutName.c_str()); + } + } + if(userParameters->getOutputPhylip()) + { + if(phylipOutFile.get() != 0 && phylipOutFile->is_open()) + { + phylipOut(alignPtr, partToOutput); + if(phylipOutFile->is_open()) + { + phylipOutFile->close(); + } + utilityObject->info("PHYLIP-Alignment file created [%s]\n", + phylipOutName.c_str()); + } + } + + if(userParameters->getOutputGde()) + { + if(gdeOutFile.get() != 0 && gdeOutFile->is_open()) + { + gdeOut(alignPtr, partToOutput); + if(gdeOutFile->is_open()) + { + gdeOutFile->close(); + } + utilityObject->info("GDE-Alignment file created [%s]\n", + gdeOutName.c_str()); + } + } + + if(userParameters->getOutputNexus()) + { + if(nexusOutFile.get() != 0 && nexusOutFile->is_open()) + { + nexusOut(alignPtr, partToOutput); + if(nexusOutFile->is_open()) + { + nexusOutFile->close(); + } + utilityObject->info("NEXUS-Alignment file created [%s]\n", + nexusOutName.c_str()); + } + } + + if(userParameters->getOutputFasta()) + { + if(fastaOutFile.get() != 0 && fastaOutFile->is_open()) + { + fastaOut(alignPtr, partToOutput); + if(fastaOutFile->is_open()) + { + fastaOutFile->close(); + } + utilityObject->info("Fasta-Alignment file created [%s]\n", + fastaOutName.c_str()); + } + } + + // Output the alignment to the screen if it is required. + if (userParameters->getShowAlign() && userParameters->getMenuFlag()) + { + showAlign(); + } + } + catch(const exception& e) + { + cerr << "An exception has occured in the createAlignmentOutput function.\n" + << e.what() << "\n"; + exit(1); + } +} + +bool AlignmentOutput::QTOpenFilesForOutput(AlignmentFileNames fileNames) +{ + if(!userParameters->getOutputClustal() && + !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() && + !userParameters->getOutputPhylip() && !userParameters->getOutputGde() && + !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) + { + utilityObject->error("You must select an alignment output format\n"); + return false; + } + + if(fileNames.clustalFile == "" && fileNames.fastaFile == "" && fileNames.gcgFile == "" + && fileNames.gdeFile == "" && fileNames.nexusFile == "" && fileNames.nrbfFile == "" + && fileNames.phylipFile == "") + { + utilityObject->error("No names for output files. Cannot output alignment.\n"); + return false; + } + + if(fileNames.clustalFile != "") + { + clustalOutName = fileNames.clustalFile; + if(!openExplicitFile(clustalOutFile, clustalOutName)) + { + return false; + } + } + if(fileNames.fastaFile != "") + { + fastaOutName = fileNames.fastaFile; + if(!openExplicitFile(fastaOutFile, fastaOutName)) + { + return false; + } + } + if(fileNames.gcgFile != "") + { + gcgOutName = fileNames.gcgFile; + if(!openExplicitFile(gcgOutFile, gcgOutName)) + { + return false; + } + } + if(fileNames.gdeFile != "") + { + gdeOutName = fileNames.gdeFile; + if(!openExplicitFile(gdeOutFile, gdeOutName)) + { + return false; + } + } + if(fileNames.nexusFile != "") + { + nexusOutName = fileNames.nexusFile; + if(!openExplicitFile(nexusOutFile, nexusOutName)) + { + return false; + } + } + if(fileNames.nrbfFile != "") + { + nbrfOutName = fileNames.nrbfFile; + if(!openExplicitFile(nbrfOutFile, nbrfOutName)) + { + return false; + } + } + if(fileNames.phylipFile != "") + { + phylipOutName = fileNames.phylipFile; + if(!openExplicitFile(phylipOutFile, phylipOutName)) + { + return false; + } + } + return true; +} +/* + * The function openAlignmentOutput opens a file for output. It returns true if it + * has been successful and false if it has not been successful + */ +bool AlignmentOutput::openAlignmentOutput(string path) +{ + if(!userParameters->getOutputClustal() && + !userParameters->getOutputNbrf() && !userParameters->getOutputGCG() && + !userParameters->getOutputPhylip() && !userParameters->getOutputGde() && + !userParameters->getOutputNexus() && !userParameters->getOutputFasta()) + { + utilityObject->error("You must select an alignment output format\n"); + return false; + } + string _fileNameToOutput = path; + if(_fileNameToOutput == "") + { + /*_fileNameToOutput = userParameters->getSeqName();*/ + /* BUG 166 , file extension, FS, 2009-01-26 */ + utilityObject->getPath(userParameters->getSeqName(), &_fileNameToOutput); + } + + if(userParameters->getOutputClustal()) + { + if (userParameters->getOutfileName() != "") + { + clustalOutName = userParameters->getOutfileName(); + if(!openExplicitFile(clustalOutFile, clustalOutName)) + { + return false; + } + } + else + { + clustalOutName = openOutputFile(clustalOutFile, + "\nEnter a name for the CLUSTAL output file ", + _fileNameToOutput, "aln"); + + if(clustalOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputNbrf()) + { + if (userParameters->getOutfileName() != "") + { + nbrfOutName = userParameters->getOutfileName(); + if(!openExplicitFile(nbrfOutFile, nbrfOutName)) + { + return false; + } + } + else + { + nbrfOutName = openOutputFile(nbrfOutFile, + "\nEnter a name for the NBRF/PIR output file", _fileNameToOutput, + "pir"); + + if(nbrfOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputGCG()) + { + if (userParameters->getOutfileName() != "") + { + gcgOutName = userParameters->getOutfileName(); + if(!openExplicitFile(gcgOutFile, gcgOutName)) + { + return false; + } + } + else + { + gcgOutName = openOutputFile(gcgOutFile, + "\nEnter a name for the GCG output file", _fileNameToOutput, "msf"); + + if(gcgOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputPhylip()) + { + if (userParameters->getOutfileName() != "") + { + phylipOutName = userParameters->getOutfileName(); + if(!openExplicitFile(phylipOutFile, phylipOutName)) + { + return false; + } + } + else + { + phylipOutName = openOutputFile(phylipOutFile, + "\nEnter a name for the PHYLIP output file", _fileNameToOutput, "phy"); + + if(phylipOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputGde()) + { + if (userParameters->getOutfileName() != "") + { + gdeOutName = userParameters->getOutfileName(); + if(!openExplicitFile(gdeOutFile, gdeOutName)) + { + return false; + } + } + else + { + gdeOutName = openOutputFile(gdeOutFile, + "\nEnter a name for the GDE output file ", + _fileNameToOutput, "gde"); + + if(gdeOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputNexus()) + { + if (userParameters->getOutfileName() != "") + { + nexusOutName = userParameters->getOutfileName(); + if(!openExplicitFile(nexusOutFile, nexusOutName)) + { + return false; + } + } + else + { + nexusOutName = openOutputFile(nexusOutFile, + "\nEnter a name for the NEXUS output file ", + _fileNameToOutput, "nxs"); + + if(nexusOutName == "") + { + return false; // We have not been successful. + } + } + } + + if(userParameters->getOutputFasta()) + { + if (userParameters->getOutfileName() != "") + { + fastaOutName = userParameters->getOutfileName(); + if(!openExplicitFile(fastaOutFile, fastaOutName)) + { + return false; + } + } + else + { + fastaOutName = openOutputFile(fastaOutFile, + "\nEnter a name for the Fasta output file ", + _fileNameToOutput, "fasta"); + + if(fastaOutName == "") + { + return false; // We have not been successful. + } + } + } + + return true; + +} + + +/* + * The function openExplicitFile is used to open a file when we have the name already. + * It returns true if the file has been opened correctly, false otherwise. + */ +bool AlignmentOutput::openExplicitFile(auto_ptr& outFile, string fileName) +{ + if (fileName == "") + { + cerr << "Bad output file [" << fileName << "]\n"; + utilityObject->error("Bad output file [%s]\n", fileName.c_str()); + return false; + } + + outFile.reset(new ofstream(fileName.c_str(), ofstream::trunc)); + if(!outFile->is_open()) + { + utilityObject->error("Cannot open output file [%s]\n", fileName.c_str()); + return false; + } + return true; +} + +/* + * The function openOutputFile is used to open a file when we dont have the name yet. + * This function returns the name if it has been successful. If it has not been + * successful it returns a blank string "" + */ +string AlignmentOutput::openOutputFile(auto_ptr& outFile, string prompt, + string path, string fileExtension) +{ + string temp; + string _fileName; // Will return this name. + string message; + _fileName = path + fileExtension; + + if(_fileName.compare(userParameters->getSeqName()) == 0) + { + cout << "Output file name is the same as input file.\n"; + if (userParameters->getMenuFlag()) + { + message = "\n\nEnter new name to avoid overwriting [" + _fileName + "]"; + + utilityObject->getStr(message, temp); + if(temp != "") + { + _fileName = temp; + } + } + } + else if (userParameters->getMenuFlag()) + { + + message = prompt + " [" + _fileName + "]"; + utilityObject->getStr(message, temp); + if(temp != "") + { + _fileName = temp; + } + } + + outFile.reset(new ofstream(_fileName.c_str(), ofstream::trunc)); + if(!outFile->is_open()) + { + utilityObject->error("Cannot open output file [%s]\n", _fileName.c_str()); + return ""; + } + return _fileName; +} + +/* + * fastaOut: output the alignment in FASTA format + */ +void AlignmentOutput::fastaOut(Alignment* alignPtr, outputRegion partToOutput) +{ + char residue; + int val; + int i, ii; + int j, slen; + int lineLength; + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + rangeNum rnum; + cout << "firstres = " << firstRes << " lastres = " << lastRes << "\n"; + try + { + const SeqArray* alignment = alignPtr->getSeqArray(); //NOTE june29 + vector sequence; + sequence.assign(lastRes + 1, '0'); + + lineLength = PAGEWIDTH - alignPtr->getMaxNames(); + + lineLength = lineLength - lineLength % 10; // round to a multiple of 10 + if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07 + { + lineLength = LINELENGTH; + } + + + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + slen = 0; + //for(j = firstRes; j < firstRes + lastRes; j++) //- nige + for(j = firstRes; j <= lastRes; j++) //- nige + { + if (j <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '-'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + if (userParameters->getLowercase()) + { + sequence[j - firstRes] = residue; + } + else + { + sequence[j - firstRes] = residue; + } + slen++; + } + (*fastaOutFile) << ">" << nameonly(alignPtr->getName(i)); + if(userParameters->getSeqRange()) + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + (*fastaOutFile) << "/" << rnum.start << "-" << rnum.end; + + } + (*fastaOutFile) << "\n"; + for(j = 1; j <= slen; j++) + { + (*fastaOutFile) << sequence[j-1]; + if((j % lineLength == 0) || (j == slen)) + { + (*fastaOutFile) << "\n"; + } + } + } + fastaOutFile->close(); + cout << "FASTA file created!\n"; + } + catch(const bad_alloc& e) + { + fastaOutFile->close(); + cerr << "A bad_alloc exception has occured in the fastaOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + fastaOutFile->close(); + cerr << "An exception has occured in the fastaOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + fastaOutFile->close(); + cerr << "An exception has occured in the fastaOut function.\n"; + exit(1); + } + +} + +/* + * gcgOut: output the alignment in gcg file format + */ +void AlignmentOutput::gcgOut(Alignment* alignPtr, outputRegion partToOutput) +{ + char residue; + int val; + int i, ii, chunks, block; + int j, k, pos1, pos2; + long grandChecksum; + + try + { + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + rangeNum rnum; + vector sequence; + vector allChecks; + int _maxLength = alignPtr->getMaxAlnLength(); + sequence.assign(_maxLength + 1, '0'); + allChecks.assign(lastSeq + 1, 0); + const SeqArray* alignment = alignPtr->getSeqArray(); + + for(i = firstSeq; i <= lastSeq; i++) + { + //for(j = firstRes; j <= firstRes + lastRes - 1; j++) //- nige + for(j = firstRes; j <= lastRes; j++) //- nige + { + if (j <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '.'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + sequence[j - firstRes + 1] = residue; + } + // pad any short sequences with gaps, to make all sequences the same length + for(; j <= firstRes + lastRes - 1; j++) + { + sequence[j - firstRes + 1] = '.'; + } + allChecks[i] = SeqGCGCheckSum(&sequence, lastRes); + } + int _index; + grandChecksum = 0; + for(i = 1; i <= alignPtr->getNumSeqs(); i++) + { + _index = alignPtr->getOutputIndex(i - 1); + grandChecksum += allChecks[_index]; + } + grandChecksum = grandChecksum % 10000; + + (*gcgOutFile) << "PileUp\n\n"; + (*gcgOutFile) << "\n\n MSF:" << setw(5) << lastRes << " Type: "; + + if(userParameters->getDNAFlag()) + { + (*gcgOutFile) << "N"; + } + else + { + (*gcgOutFile) << "P"; + } + + (*gcgOutFile) << " Check:" << setw(6) << grandChecksum << " .. \n\n"; + float _seqWeight; + + int length = lastRes - firstRes + 1; //- nige + + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + _seqWeight = (alignPtr->getSeqWeight(i - 1) * 100) / (float) INT_SCALE_FACTOR; + + (*gcgOutFile) << " Name: " << alignPtr->getName(i) << " oo Len:" + //<< setw(5) << lastRes << " Check:" << setw(6) << allChecks[i] //- nige + << setw(5) << length << " Check:" << setw(6) << allChecks[i] + << " Weight: " << fixed << setprecision(1) << _seqWeight << "\n"; + } + (*gcgOutFile) << "\n//\n"; + + chunks = length / GCG_LINELENGTH; //- nige + if(length % GCG_LINELENGTH != 0) //- nige + { + ++chunks; + } + + for(block = 1; block <= chunks; block++) + { + (*gcgOutFile) << "\n\n"; + pos1 = ((block - 1) * GCG_LINELENGTH) + 1; + pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1;//- nige + + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + if (!userParameters->getSeqRange()) + { + (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 5) << left + << alignPtr->getName(i) << " "; + } + else + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + std::stringstream ss; + std::stringstream ss2; + string rangeStart; + string rangeEnd; + ss << rnum.start; + ss >> rangeStart; + ss2 << rnum.end; + ss2 >> rangeEnd; + string nameAndRange = nameonly(alignPtr->getName(i)) + "/" + rangeStart; + nameAndRange += "-" + rangeEnd; + (*gcgOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) << left + << nameAndRange; + } + for(j = pos1, k = 1; j <= pos2; j++, k++) + { + + //JULIE - + //check for int sequences - pad out with '.' characters to end + + if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j + firstRes - 1); + val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + residue = '.'; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '.'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + (*gcgOutFile) << residue; + if(j % 10 == 0) + { + (*gcgOutFile) << " "; + } + } + } + } + (*gcgOutFile) << "\n\n"; + gcgOutFile->close(); + } + catch(const bad_alloc& e) + { + gcgOutFile->close(); + cerr << "A bad_alloc exception has occured in the gcgOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + gcgOutFile->close(); + cerr << "An exception has occured in the gcgOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + gcgOutFile->close(); + cerr << "An exception has occured in the gcgOut function.\n"; + exit(1); + } +} + +void AlignmentOutput::phylipOut(Alignment* alignPtr, outputRegion partToOutput) +{ + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + try + { + char residue; + int val; + int i, ii, chunks, block; + int j, k, pos1, pos2; + int nameLen; + bool warn; + vector _seqNames; + const SeqArray* alignment = alignPtr->getSeqArray(); + rangeNum rnum; + + // Push on a blank string. This is because the index starts at 1 for the seqs etc.. + _seqNames.push_back(""); + + nameLen = 0; + + for(i = firstSeq; i <= lastSeq; i++) + { + _seqNames.push_back(alignPtr->getName(i)); + ii = _seqNames.size(); + if(nameLen < ii) + { + nameLen = ii; + } + } + if(nameLen > 10) + { + warn = false; + for(i = 0; i < (int)_seqNames.size() - 1; i++) + { + for(j = i + 1; j < (int)_seqNames.size(); j++) + { + if (_seqNames[i].compare(_seqNames[j]) == 0) + { + warn = true; + } + } + } + if(warn) + { + utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output.\nNames in the PHYLIP format file are NOT unambiguous."); + } + else + { + utilityObject->warning("Truncating sequence names to 10 characters for PHYLIP output."); + } + } + + int length = lastRes - firstRes + 1; //- nige + + chunks = length / GCG_LINELENGTH; //- nige + if(length % GCG_LINELENGTH != 0) //- nige + { + ++chunks; + } + + (*phylipOutFile) << setw(6) << alignPtr->getNumSeqs() << " " + << setw(6) << length; //-nige + + for(block = 1; block <= chunks; block++) + { + pos1 = ((block - 1) * GCG_LINELENGTH) + 1; + //pos2 = (lastRes < pos1 + GCG_LINELENGTH - 1)? lastRes : pos1 + GCG_LINELENGTH - 1; //- nige + pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + if(block == 1) + { + if(!userParameters->getSeqRange()) + { + string name = alignPtr->getName(i); + (*phylipOutFile) << "\n" << setw(10) << left + << name.substr(0,10) << " "; + } + else + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + std::stringstream ss; + std::stringstream ss2; + string rangeStart; + string rangeEnd; + ss << rnum.start; + ss >> rangeStart; + ss2 << rnum.end; + ss2 >> rangeEnd; + string nameAndRange = nameonly(alignPtr->getName(i)) + "/"; + nameAndRange += rangeStart + "-" + rangeEnd; + (*phylipOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) + << left + << nameAndRange; + } + } + else + { + (*phylipOutFile) << "\n "; + } + for(j = pos1, k = 1; j <= pos2; j++, k++) + { + if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j + firstRes - 1); + val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '-'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + (*phylipOutFile) << residue; + if(j % 10 == 0) + { + (*phylipOutFile) << " "; + } + } + } + (*phylipOutFile) << "\n"; + } + } + catch(const bad_alloc& e) + { + phylipOutFile->close(); + cerr << "A bad_alloc exception has occured in the phylipOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + phylipOutFile->close(); + cerr << "An exception has occured in the phylipOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + phylipOutFile->close(); + cerr << "An exception has occured in the phylipOut function.\n"; + exit(1); + } +} + +void AlignmentOutput::nexusOut(Alignment* alignPtr, outputRegion partToOutput) +{ + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + try + { + char residue; + int val; + int i, ii, chunks, block; + int j, k, pos1, pos2; + const SeqArray* alignment = alignPtr->getSeqArray(); + rangeNum rnum; + + int length = lastRes - firstRes + 1; //- nige + + chunks = length / GCG_LINELENGTH; //- nige + if(length % GCG_LINELENGTH != 0) //- nige + { + ++chunks; + } + + (*nexusOutFile) << "#NEXUS\n"; + (*nexusOutFile) << "BEGIN DATA;\n"; + (*nexusOutFile) << "dimensions ntax=" << alignPtr->getNumSeqs() + << " nchar=" << length << ";\n"; //- nige + (*nexusOutFile) << "format missing=?\n"; + (*nexusOutFile) << "symbols=\""; + + for(i = 0; i <= userParameters->getMaxAA(); i++) + { + (*nexusOutFile) << userParameters->getAminoAcidCode(i); + } + (*nexusOutFile) << "\"\n"; + (*nexusOutFile) << "interleave datatype="; + bool _dnaFlag = userParameters->getDNAFlag(); + string _type = _dnaFlag ? "DNA " : "PROTEIN "; + (*nexusOutFile) << _type; + (*nexusOutFile) << "gap= -;\n"; + (*nexusOutFile) << "\nmatrix"; + + for(block = 1; block <= chunks; block++) + { + pos1 = ((block - 1) * GCG_LINELENGTH) + 1; + pos2 = (length < pos1 + GCG_LINELENGTH - 1)? length : pos1 + GCG_LINELENGTH - 1; //- nige + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + if (!userParameters->getSeqRange()) + { + (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 1) << left + << alignPtr->getName(i) << " "; + } + else + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + + std::stringstream ss; + std::stringstream ss2; + string rangeStart; + string rangeEnd; + ss << rnum.start; + ss >> rangeStart; + ss2 << rnum.end; + ss2 >> rangeEnd; + string nameAndRange = nameonly(alignPtr->getName(i)) + "/"; + nameAndRange += rangeStart + "-" + rangeEnd; + (*nexusOutFile) << "\n" << setw(alignPtr->getMaxNames() + 15) + << left + << nameAndRange; + } + for(j = pos1, k = 1; j <= pos2; j++, k++) + { + if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j + firstRes - 1); + val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '-'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + (*nexusOutFile) << residue; + } + } + (*nexusOutFile) << "\n"; + } + (*nexusOutFile) << ";\nend;\n"; + } + catch(const bad_alloc& e) + { + nexusOutFile->close(); + cerr << "A bad_alloc exception has occured in the nexusOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + nexusOutFile->close(); + cerr << "An exception has occured in the nexusOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + nexusOutFile->close(); + cerr << "An exception has occured in the nexusOut function.\n"; + exit(1); + } +} + +/* + * gdeOut: print out the alignment in gde file format. + */ +void AlignmentOutput::gdeOut(Alignment* alignPtr, outputRegion partToOutput) +{ + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + + int length = lastRes - firstRes + 1; //- nige + + try + { + char residue; + int val; + vector _ssMask1, _ssMask2, seq; + int i, ii; + int j, slen; + int lineLength; + const SeqArray* alignment = alignPtr->getSeqArray(); + rangeNum rnum; + + seq.assign(alignPtr->getMaxAlnLength() + 1, '0'); + + // decide the line length for this alignment - maximum is LINELENGTH + lineLength = PAGEWIDTH - alignPtr->getMaxNames(); + lineLength = lineLength - lineLength % 10; // round to a multiple of 10 + if (lineLength > LINELENGTH || lineLength <= 0) + { + lineLength = LINELENGTH; + } + + // If we are using the secondary structure info from profile 1, set it up + if (userParameters->getStructPenalties1() == SECST && + userParameters->getUseSS1() == true) + { + int _lengthSeq1 = alignPtr->getSeqLength(1); + vector* _secStructMask1 = alignPtr->getSecStructMask1(); + _ssMask1.assign(_lengthSeq1 + 10, 0); + + for (i = 0;i < _lengthSeq1;i++) + { + _ssMask1[i] = _secStructMask1->at(i); + } + printSecStructMask(_lengthSeq1, _secStructMask1, &_ssMask1); + } + + // If we are using the secondary structure info from profile 2, set it up + if (userParameters->getStructPenalties2() == SECST && + userParameters->getUseSS2() == true) + { + int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1; + int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq); + _ssMask2.assign(lengthSeqProfile2 + 10, 0); + vector* _secStructMask2 = alignPtr->getSecStructMask2(); + + for (i=0;i < lengthSeqProfile2;i++) + { + _ssMask2[i] = _secStructMask2->at(i); + } + printSecStructMask(lengthSeqProfile2, _secStructMask2, &_ssMask2); + } + + bool _dnaFlag = userParameters->getDNAFlag(); + string _prefix = _dnaFlag ? "#" : "%"; + + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + + (*gdeOutFile) << _prefix; + if(!userParameters->getSeqRange()) + { + (*gdeOutFile) << alignPtr->getName(i) << "\n"; + } + else + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + (*gdeOutFile) << nameonly(alignPtr->getName(i)) << "/" + << rnum.start << "-" << rnum.end << "\n"; + } + slen = 0; + + //for(j = firstRes; j < firstRes + lastRes; j++) //- nige + for(j = firstRes; j <= lastRes; j++) //- nige + { + if (j <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '-'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + if (userParameters->getLowercase()) + { + seq[j - firstRes] = (char)tolower((int)residue); + } + else + { + seq[j - firstRes] = residue; + } + slen++; + } + for(j = 1; j <= slen; j++) + { + (*gdeOutFile) << seq[j-1]; + if((j % lineLength == 0) || (j == slen)) + { + (*gdeOutFile) << "\n"; + } + } + } + + if (userParameters->getOutputStructPenalties() == 0 || + userParameters->getOutputStructPenalties() == 2) + { + if (userParameters->getStructPenalties1() == SECST && + userParameters->getUseSS1() == true) + { + (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left + << alignPtr->getSecStructName1() << "\n"; + + //for(i = firstRes; i < firstRes + lastRes; i++) //- nige + for(i = firstRes; i <= lastRes; i++) //- nige + { + val = _ssMask1[i - 1]; + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + seq[i - firstRes] = '-'; + } + else + { + seq[i - firstRes] = val; + } + } + + for(i = 1; i <= lastRes; i++) + { + (*gdeOutFile) << seq[i-1]; + if((i % lineLength == 0) || (i == lastRes)) + { + (*gdeOutFile) << "\n"; + } + } + } + + if (userParameters->getStructPenalties2() == SECST && + userParameters->getUseSS2() == true) + { + (*gdeOutFile) << "\"SS_" << setw(alignPtr->getMaxNames()) << left + << alignPtr->getSecStructName2() << "\n"; + + //for(i = firstRes; i < firstRes + lastRes; i++) //- nige + for(i = firstRes; i <= lastRes; i++) //- nige + { + val = _ssMask2[i - 1]; + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + seq[i - firstRes] = '-'; + } + else + { + seq[i - firstRes] = val; + } + } + + //for(i = 1; i <= lastRes; i++) //- nige + for(i = 1; i <= length; i++) //- nige + { + (*gdeOutFile) << seq[i - 1]; + //if((i % lineLength == 0) || (i == lastRes)) //- nige + if((i % lineLength == 0) || (i == length)) //- nige + { + (*gdeOutFile) << "\n"; + } + } + } + } + if (userParameters->getOutputStructPenalties() == 1 || + userParameters->getOutputStructPenalties() == 2) + { + if (userParameters->getStructPenalties1() != NONE && + userParameters->getUseSS1() == true) + { + (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left + << alignPtr->getSecStructName1() << "\n"; + + vector* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1(); + //for(i = firstRes; i < firstRes + lastRes; i++) //- nige + for(i = firstRes; i <= lastRes; i++) //- nige + { + val = _gapPenaltyMask1->at(i - 1); + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + seq[i - firstRes] = '-'; + } + else + { + seq[i - firstRes] = val; + } + } + + //for(i = 1; i <= lastRes; i++) //- nige + for(i = 1; i <= length; i++) //- nige + { + (*gdeOutFile) << seq[i - 1]; + //if((i % lineLength == 0) || (i == lastRes)) //- nige + if((i % lineLength == 0) || (i == length)) //- nige + { + (*gdeOutFile) << "\n"; + } + } + } + if (userParameters->getStructPenalties2() != NONE && + userParameters->getUseSS2() == true) + { + (*gdeOutFile) << "\"GM_" << setw(alignPtr->getMaxNames()) << left + << alignPtr->getSecStructName2() << "\n"; + + vector* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2(); + //for(i = firstRes; i < firstRes + lastRes; i++) //- nige + for(i = firstRes; i < length; i++) //- nige + { + val = _gapPenaltyMask2->at(i-1); + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + seq[i - firstRes] = '-'; + } + else + { + seq[i - firstRes] = val; + } + } + + //for(i = 1; i <= lastRes; i++) //- nige + for(i = 1; i <= length; i++) //- nige + { + (*gdeOutFile) << seq[i - 1]; + //if((i % lineLength == 0) || (i == lastRes)) //- nige + if((i % lineLength == 0) || (i == length)) //- nige + { + (*gdeOutFile) << "\n"; + } + } + } + } + gdeOutFile->close(); + } + catch(const bad_alloc& e) + { + gdeOutFile->close(); + cerr << "A bad_alloc exception has occured in the gdeOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + gdeOutFile->close(); + cerr << "An exception has occured in the gdeOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + gdeOutFile->close(); + cerr << "An exception has occured in the gdeOut function.\n"; + exit(1); + } +} + +void AlignmentOutput::nbrfOut(Alignment* alignPtr, outputRegion partToOutput) +{ + char residue; + int val; + int i, ii; + int j, slen; + int lineLength; + rangeNum rnum; + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + + try + { + int _maxLength = alignPtr->getMaxAlnLength(); + vector sequence; + sequence.assign(_maxLength + 1, '0'); + const SeqArray* alignment = alignPtr->getSeqArray(); + // decide the line length for this alignment - maximum is LINELENGTH + lineLength = PAGEWIDTH - alignPtr->getMaxNames(); + lineLength = lineLength - lineLength % 10; // round to a multiple of 10 + if (lineLength > LINELENGTH || lineLength <= 0) // Mark, 18-7-07 + { + lineLength = LINELENGTH; + } + + // Get name prefix. DL if DNA, P1 if protein + string namePrefix; + bool _dnaFlag = userParameters->getDNAFlag(); + namePrefix = _dnaFlag ? ">DL;" : ">P1;"; + + //int length = lastRes - firstRes + 1; //- nige + + for(ii = firstSeq; ii <= lastSeq; ii++) + { + i = alignPtr->getOutputIndex(ii - 1); + + (*nbrfOutFile) << namePrefix; + + if (!userParameters->getSeqRange()) + { + (*nbrfOutFile) << alignPtr->getName(i) << "\n" + << alignPtr->getTitle(i) << "\n"; + } + else + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + (*nbrfOutFile) << nameonly(alignPtr->getName(i)) << "/" << rnum.start + << "-"<< rnum.end << "\n" + << alignPtr->getTitle(i) << "\n"; + } + slen = 0; + //for(j = firstRes; j < firstRes + lastRes; j++) //- nige + for(j = firstRes; j <= lastRes; j++) //- nige + { + // NOTE I changed this here!!!!! + if (j <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + residue = '-'; + } + else + { + residue = userParameters->getAminoAcidCode(val); + } + sequence[j - firstRes] = residue; + slen++; + } + for(j = 1; j <= slen; j++) + { + (*nbrfOutFile) << sequence[j - 1]; + if((j % lineLength == 0) || (j == slen)) + { + (*nbrfOutFile) << "\n"; + } + } + (*nbrfOutFile) << "*\n"; + } + nbrfOutFile->close(); + } + catch(const bad_alloc& e) + { + nbrfOutFile->close(); + cerr << "A bad_alloc exception has occured in the nbrfOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + nbrfOutFile->close(); + cerr << "An exception has occured in the nbrfOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + nbrfOutFile->close(); + cerr << "An exception has occured in the nbrfOut function.\n"; + exit(1); + } +} + +/* + * The function clustalOut is used to ouput the Alignment into a clustal format + * file. + */ +void AlignmentOutput::clustalOut(Alignment* alignPtr, outputRegion partToOutput) +{ + int firstRes = partToOutput._firstRes; + int lastRes = partToOutput._lastRes; + int firstSeq = partToOutput._firstSeq; + int lastSeq = partToOutput._lastSeq; + + try + { + vector seq1; + vector seqNo; + vector printSeqNo; + vector ss_mask1, ss_mask2; + const SeqArray* alignment = alignPtr->getSeqArray(); + char temp[MAXLINE]; + char c; + int val; + int ii, lv1, catident1[NUMRES], catident2[NUMRES], ident, chunks; + int i, j, k, l; + int pos, ptr; + int lineLength; + + rangeNum rnum; + string tmpStr = ""; + string sequenceLine = ""; + + + int _numSequences = alignPtr->getNumSeqs(); + // PMcG revert to Clustalw1.83 output format for name width + const int NAMESWIDTH=std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY); + + seqNo.assign(_numSequences + 1, 0); + printSeqNo.assign(_numSequences + 1, 0); + + // check that lastSeq <= _numSequences + if(lastSeq > _numSequences) + { + lastSeq = _numSequences; + } + + for (i = firstSeq; i <= lastSeq; i++) + { + printSeqNo[i] = seqNo[i] = 0; + for(j = 1; j < firstRes; j++) + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + if((val >= 0) || (val <= userParameters->getMaxAA())) + { + seqNo[i]++; + } + } + } + + seq1.assign(alignPtr->getMaxAlnLength() + 1, 0); + // Check if we have secondary structure in file 1 and if we want to output it. + if (userParameters->getStructPenalties1() == SECST && + userParameters->getUseSS1() == true) + { + int lengthSeq1 = alignPtr->getSeqLength(1); + ss_mask1.assign(lengthSeq1 + 10, 0); + vector* _secStructMask1 = alignPtr->getSecStructMask1(); + + for (i = 0; i < lengthSeq1; i++) + { + ss_mask1[i] = _secStructMask1->at(i); + } + printSecStructMask(lengthSeq1, _secStructMask1, &ss_mask1); + + } + + // Check if we have secondary structure in file 2 and if we want to output it. + if (userParameters->getStructPenalties2() == SECST && + userParameters->getUseSS2() == true && + userParameters->getProfile2Empty() == false) + { + // AW added test getProfile2Empty was meant to fix bug + // 141, but only prevents segfault, output still faulty. + // Has to be fixed somewhere else + int indexProfile2FirstSeq = alignPtr->getProfile1NumSeqs() + 1; + int lengthSeqProfile2 = alignPtr->getSeqLength(indexProfile2FirstSeq); + ss_mask2.assign(lengthSeqProfile2 + 10, 0); + vector* _secStructMask2 = alignPtr->getSecStructMask2(); + for (i = 0; i < lengthSeqProfile2; i++) + { + ss_mask2[i] = _secStructMask2->at(i); + } + printSecStructMask(lengthSeqProfile2, _secStructMask2, &ss_mask2); + } + + (*clustalOutFile) << "CLUSTAL "<< userParameters->getRevisionLevel() + << " multiple sequence alignment\n\n"; + + // decide the line length for this alignment - maximum is LINELENGTH + + //PMcG 9-2-2008 make line output same as 1.83 + //lineLength = PAGEWIDTH - alignPtr->getMaxNames(); + lineLength = PAGEWIDTH - NAMESWIDTH; + lineLength = lineLength - lineLength % 10; // round to a multiple of 10 + if (lineLength > LINELENGTH || lineLength <= 0) // Mark 18-7-07 + { + lineLength = LINELENGTH; + } + + int length = lastRes - firstRes + 1; //- nige + + chunks = length / lineLength; //- nige + if(length % lineLength != 0) //- nige + { + ++chunks; + } + //printf("firstRes=%d,lastRes=%d,length=%d,chunks=%d\n", + // firstRes, lastRes, length, chunks); + + // This will loop through each of the blocks. + for(lv1 = 1; lv1 <= chunks; ++lv1) + { + // pos is begining of chunk, ptr is the end of the chunk to be displayed. + pos = ((lv1 - 1) * lineLength) + 1; + ptr = (length < pos + lineLength - 1) ? length : pos + lineLength - 1; //- nige + + (*clustalOutFile) << "\n"; + int _outStructPenalty = userParameters->getOutputStructPenalties(); + + string secStructName1 = alignPtr->getSecStructName1(); // Mark 18-7-07 + + if((int)secStructName1.size() > MAXNAMESTODISPLAY) + { + //secStructName1 = secStructName1.substr(0, MAXNAMESTODISPLAY); + secStructName1 = secStructName1.substr(0, NAMESWIDTH); + } + if (_outStructPenalty == 0 || _outStructPenalty == 2) + { + if (userParameters->getStructPenalties1() == SECST && + userParameters->getUseSS1() == true) + { + for(i = pos; i <= ptr; ++i) + { + // Check if we can access mask position first + if((int)ss_mask1.size() > i + firstRes - 2) + { + val = ss_mask1[i + firstRes - 2]; + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + temp[i - pos] = '-'; + } + else + { + temp[i - pos] = val; + } + } + else + { + break; + } + } + temp[i - pos] = EOS; + + if(userParameters->getSeqRange()) + { + //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY + + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH + + clusSecStructOffset) + << left << secStructName1 + << " " << temp << "\n"; + } + else + { + //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY) + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH) + << left << secStructName1 << " " + << temp << "\n"; + } + } + } + if (_outStructPenalty == 1 || _outStructPenalty == 2) + { + if (userParameters->getStructPenalties1() != NONE && + userParameters->getUseSS1() == true) + { + vector* _gapPenaltyMask1 = alignPtr->getGapPenaltyMask1(); + for(i = pos; i <= ptr; ++i) + { + if((int)_gapPenaltyMask1->size() > i + firstRes - 2) + { + val = _gapPenaltyMask1->at(i + firstRes - 2); + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + temp[i - pos] = '-'; + } + else + { + temp[i - pos] = val; + } + } + else + { + break; + } + } + temp[i - pos] = EOS; + + //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left + (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left + << secStructName1 << " " + << temp << "\n"; + } + } + + + string secStructName2 = alignPtr->getSecStructName2(); // Mark 18-7-07 + //if((int)secStructName2.size() > MAXNAMESTODISPLAY) + if((int)secStructName2.size() > NAMESWIDTH) + { + //secStructName2 = secStructName2.substr(0, MAXNAMESTODISPLAY); + secStructName2 = secStructName2.substr(0, NAMESWIDTH); + } + + if (_outStructPenalty == 0 || _outStructPenalty == 2) + { + if (userParameters->getStructPenalties2() == SECST && + userParameters->getUseSS2() == true) + { + for(i = pos; i <= ptr; ++i) + { + if((int)ss_mask2.size() > i + firstRes - 2) + { + val = ss_mask2[i + firstRes - 2]; + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + temp[i - pos] = '-'; + } + else + { + temp[i - pos] = val; + } + } + else + { + break; + } + } + temp[i - pos] = EOS; + + + if (userParameters->getSeqRange()) + { + //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY + + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH + + clusSecStructOffset) + << left << secStructName2; + (*clustalOutFile) << " " << temp << "\n"; + } + else + { + //(*clustalOutFile) << "!SS_" << setw(MAXNAMESTODISPLAY) + (*clustalOutFile) << "!SS_" << setw(NAMESWIDTH) + << left << secStructName2 << " " + << temp << "\n"; + } + } + } + + if (_outStructPenalty == 1 || _outStructPenalty == 2) + { + if (userParameters->getStructPenalties2() != NONE && + userParameters->getUseSS2() == true) + { + vector* _gapPenaltyMask2 = alignPtr->getGapPenaltyMask2(); + for(i = pos; i <= ptr; ++i) + { + if((int)_gapPenaltyMask2->size() > i + firstRes - 2) + { + val = _gapPenaltyMask2->at(i + firstRes - 2); + if (val == userParameters->getGapPos1() || + val == userParameters->getGapPos2()) + { + temp[i - pos] = '-'; + } + else + { + temp[i - pos] = val; + } + } + else + { + break; + } + } + temp[i - pos] = EOS; + + //(*clustalOutFile) << "!GM_" << setw(MAXNAMESTODISPLAY) << left + (*clustalOutFile) << "!GM_" << setw(NAMESWIDTH) << left + << secStructName2 << " " + << temp << "\n"; + } + } + + for(ii = firstSeq; ii <= lastSeq; ++ii) + { + i = alignPtr->getOutputIndex(ii - 1); + printSeqNo[i] = 0; + for(j = pos; j <= ptr; ++j) + { + if (j + firstRes - 1 <= alignPtr->getSeqLength(i)) + { + //val = alignPtr.getResidue(i, j + firstRes - 1); + val = (*alignment)[i][j + firstRes - 1]; // NOTE june29 + } + else + { + val = -3; + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + seq1[j] = '-'; + } + else + { + seq1[j] = userParameters->getAminoAcidCode(val); + seqNo[i]++; + printSeqNo[i] = 1; + } + } + for(; j <= ptr; ++j) + { + seq1[j]='-'; + } + + sequenceLine = ""; + + for(int index = pos; index < ptr + 1; index++) + { + sequenceLine += seq1[index]; + } + + string seqName = alignPtr->getName(i); + //if((int)seqName.size() > MAXNAMESTODISPLAY) + if((int)seqName.size() > NAMESWIDTH) + { + //seqName = seqName.substr(0, MAXNAMESTODISPLAY); + seqName = seqName.substr(0, NAMESWIDTH); + } + + if (!userParameters->getSeqRange()) + { + // NOTE I made a change here from + 5, to + 6. + //(*clustalOutFile) << setw(alignPtr->getMaxNames() + 6) << left + // << alignPtr->getName(i); + + //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + 6) << left + // << seqName; + // PMcG changed this to revert to behaviour of clustalw1.83 + // + //(*clustalOutFile) << setw(std::max(std::min(MAXNAMESTODISPLAY,alignPtr->getMaxNames()) , MINNAMESTODISPLAY) + 6) + (*clustalOutFile) << setw(NAMESWIDTH + 6) << left << seqName; + + + } + else // Put the sequence range information in! + { + findRangeValues(alignPtr, &rnum, firstRes, lastRes, ii); + string nameOnly = nameonly(seqName); + std::stringstream ss; + std::stringstream ss2; + string rangeStart; + string rangeEnd; + ss << rnum.start; + ss >> rangeStart; + ss2 << rnum.end; + ss2 >> rangeEnd; + tmpStr = nameOnly + "/" + rangeStart + "-" + rangeEnd; + //(*clustalOutFile) << setw(MAXNAMESTODISPLAY + clusSequenceOffset) + (*clustalOutFile) << setw(NAMESWIDTH + clusSequenceOffset) + << left << tmpStr; + } + + (*clustalOutFile) << sequenceLine; + + if (userParameters->getClSeqNumbers() && printSeqNo[i]) + { + (*clustalOutFile) << " " << seqNo[i]; + } + + (*clustalOutFile) << "\n"; + } + + // Now print out the conservation information! + for(i = pos; i <= ptr; ++i) + { + seq1[i] = ' '; + ident = 0; + + for(j = 1; j <= (int)strongGroup.size(); j++) + { + catident1[j - 1] = 0; + } + for(j = 1; j <= (int)weakGroup.size(); j++) + { + catident2[j - 1] = 0; + } + + for(j = firstSeq; j <= lastSeq; ++j) + { + if((i + firstRes - 1 <= alignPtr->getSeqLength(j)) && + (i + firstRes - 1 <= alignPtr->getSeqLength(firstSeq))) + {// NOTE june29 + if(((*alignment)[firstSeq][i + firstRes - 1] >= 0) && + ((*alignment)[firstSeq][i + firstRes - 1] <= + userParameters->getMaxAA())) + { + // Count how many are identical to the first sequence + if((*alignment)[firstSeq][i + firstRes - 1] == + (*alignment)[j][i + firstRes - 1]) + { + ++ident; + } + // Count how many are in the same category. + for(k = 1; k <= (int)strongGroup.size(); k++) + { + for(l = 0; (c = strongGroup[k - 1][l]); l++) + { + int resCode = (*alignment)[j][i + firstRes - 1]; + if(resCode <= userParameters->getMaxAA() + 1) + { + if (userParameters->getAminoAcidCode(resCode) == c) + { + catident1[k - 1]++; + break; + } + } + } + } + for(k = 1; k <= (int)weakGroup.size(); k++) + { + for(l = 0; (c = weakGroup[k - 1][l]); l++) + {//NOTE june29 + int resCode = (*alignment)[j][i + firstRes - 1]; + if(resCode <= userParameters->getMaxAA() + 1) + { + if (userParameters->getAminoAcidCode(resCode) == c) + { + catident2[k - 1]++; + break; + } + } + } + } + } + } + } + + // Now do the conservation part for each block. + if(ident == lastSeq - firstSeq + 1) + { + seq1[i] = '*'; // All residues the same! + } + else if (!userParameters->getDNAFlag()) + { + for(k = 1; k <= (int)strongGroup.size(); k++) + { + if (catident1[k - 1] == lastSeq - firstSeq + 1) + { + seq1[i] = ':'; // All residues member of the same category + break; + } + } + if(seq1[i] == ' ') + { + for(k = 1; k <= (int)weakGroup.size(); k++) + { + if (catident2[k - 1] == lastSeq - firstSeq + 1) + { + seq1[i] = '.'; // All residues member of the same category + break; + } + } + } + } + } + + sequenceLine = ""; + for(int index = pos; index < ptr + 1; index++) + { + sequenceLine += seq1[index]; + } + + //for(k = 0; k < MAXNAMESTODISPLAY + 6; k++) + for(k = 0; k < NAMESWIDTH + 6; k++) + { + (*clustalOutFile) << " "; + } + if(userParameters->getSeqRange()) + { + + (*clustalOutFile) << " "; + } + + (*clustalOutFile) << sequenceLine << "\n"; + } + clustalOutFile->close(); + } + catch(const bad_alloc& e) + { + clustalOutFile->close(); + cerr << "A bad_alloc exception has occured in the clustalOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(VectorOutOfRange e) + { + clustalOutFile->close(); + cerr << "An exception has occured in the clustalOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(exception e) + { + clustalOutFile->close(); + cerr << "An exception has occured in the clustalOut function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + clustalOutFile->close(); + cerr << "An exception has occured in the clustalOut function.\n"; + exit(1); + } +} + +/* + * The function printSecStructMask takes in the 2 mask vectors. It makes changes to + * mask and puts the result in structMask. This is the one that will be printed out. + * NOTE I have had to use (*structMask)[i] syntax. This is not good. But there is no + * way to do random access setting in vectors otherwise. + */ +void AlignmentOutput::printSecStructMask(int prfLength, vector* mask, + vector* structMask) +{ + int i, j; +// calculate the gap penalty mask from the secondary structures + + i = 0; + try + { + while (i < prfLength) + { + if (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') + { + for (j = 0; j < userParameters->getHelixEndMinus(); j++) + { + if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'a' + && mask->at(i+j) != '$')) + { + break; + } + (*structMask)[i+j] = 'a'; + } + i += j; + while (tolower(mask->at(i)) == 'a' || mask->at(i) == '$') + { + if (i >= prfLength) + break; + if (mask->at(i) == '$') + { + (*structMask)[i] = 'A'; + i++; + break; + } + else + (*structMask)[i] = (*mask)[i]; + i++; + } + for (j = 0; j < userParameters->getHelixEndMinus(); j++) + { + if ((i-j-1>=0) && (tolower(mask->at(i-j-1)) == 'a' + || mask->at(i-j-1) == '$')) + { + (*structMask)[i - j - 1] = 'a'; + } + } + } + else if (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') + { + for (j = 0; j < userParameters->getStrandEndMinus(); j++) + { + if (i + j >= prfLength || (tolower(mask->at(i+j)) != 'b' + && mask->at(i+j) != '%')) + { + break; + } + (*structMask)[i+j] = 'b'; + } + i += j; + while (tolower(mask->at(i)) == 'b' || mask->at(i) == '%') + { + if (i >= prfLength) + break; + if (mask->at(i) == '%') + { + (*structMask)[i] = 'B'; + i++; + break; + } + else + { + (*structMask)[i] = mask->at(i); + } + i++; + } + for (j = 0; j < userParameters->getStrandEndMinus(); j++) + { + if ((i - j - 1 >= 0) && (tolower(mask->at(i - j - 1)) == 'b' || + mask->at(i-j-1) == '%')) + { + (*structMask)[i-j-1] = 'b'; + } + } + } + else + { + i++; + } + } + } + catch(const exception& e) + { + // Catch all the exceptions + cerr << "There has been an exception in the function printSecStructMask\n" + << e.what() << "\n"; + exit(1); + } +} + +/* + * The function findRangeValues is used to find the range to be printed out. + * + * + */ +void AlignmentOutput::findRangeValues(Alignment* alignPtr, rangeNum *rnum, int firstRes, + int len, int firstSeq) +{ + assert(rnum); + assert(firstRes > 0); + assert(len > 0); + assert(firstSeq > 0); + + try + { + int val; + int i, ii; + int j, slen; + + char tmpName[FILENAMELEN + 15]; + int iStart = 0; + int iEnd = 0; // to print sequence start-end with names + int found = 0; + int ngaps = 0; + int tmpStart = 0; + int tmpEnd = 0; + int ntermgaps = 0; + int pregaps = 0; + int tmpk = 0; + int isRange = 0; + int formula = 0; + const SeqArray* alignment = alignPtr->getSeqArray(); + + tmpName[0] = '\0'; + slen = 0; + + ii = firstSeq ; + i = alignPtr->getOutputIndex(ii - 1); // NOTE Same as elsewhere! + string name = alignPtr->getName(i); + + if( (sscanf(name.c_str(),"%[^/]/%d-%d",tmpName, &tmpStart, &tmpEnd) == 3)) + { + isRange = 1; + } + + for(tmpk = 1; tmpk < firstRes; tmpk++) + { + // do this irrespective of above sscanf + //val = alignPtr.getResidue(i, tmpk); + val = (*alignment)[i][tmpk]; // NOTE june29 + if ((val < 0) || (val > userParameters->getMaxAA())) + { + //it is gap + pregaps++; + } + } + + for(j = firstRes; j < firstRes + len; j++) + { + if(j > alignPtr->getSeqLength(i)) + { + val = -3; // Cant get it so break out. + } + else + { + //val = alignPtr.getResidue(i, j); + val = (*alignment)[i][j]; // NOTE june29 + } + if((val == -3) || (val == 253)) + { + break; + } + else if((val < 0) || (val > userParameters->getMaxAA())) + { + ngaps++; + } + else + { + found = j; + } + if ( found && (iStart == 0) ) + { + iStart = found; + ntermgaps = ngaps; + } + slen++; + } + if(userParameters->getSeqRange()) + { + cout << "Name : " << alignPtr->getName(i) << " " + << "\n firstRes = "<< firstRes << " " + << " len = " << len << " " + << "\n iStart = " << iStart << " " + << "\n tmpStart = " << tmpStart << " " + << "\n ngaps = " << ngaps << " " + << "\n pregaps = " << pregaps << " "; + if (!isRange) + { + formula = iStart - pregaps; + } + else + { + formula = iStart - pregaps + ( tmpStart == 1 ? 0: tmpStart-1) ; + } + + cout << "\n\nsuggestion iStart - pregaps + tmpStart - ntermgaps = " + << iStart << " - " << pregaps << " + " << tmpStart << " - " << ntermgaps + << " formula " << formula << " "; + } + else + { + cerr << "\n no range found .... strange, iStart = " << iStart; + formula = 1; + } + if (pregaps == firstRes - 1) // all gaps - now the conditions........ + { + formula = tmpStart ; // keep the previous start... + } + formula = (formula <= 0) ? 1: formula; + if (pregaps == 0 && tmpStart == 0) + { + formula = firstRes; + } + iEnd = formula + len - ngaps -1; + rnum->start = formula; + rnum->end = iEnd; + cout << "\n check... " << alignPtr->getName(i) << " " << rnum->start + << " - " << rnum->end; + cout << " Done checking........."; + } + catch(VectorOutOfRange e) + { + cerr << "An exception has occured in the findRangeValues function.\n" + << e.what() << "\n"; + exit(1); + } + catch(...) + { + cerr << "An exception has occured in findRangeValues function\n"; + exit(1); + } +} + +string AlignmentOutput::nameonly(string s) +{ + string tmp; + try + { + int i = 0; + + while (i < (int)s.size()) + { + if(s.at(i) != '/') + { + tmp += s.at(i); + i++; + } + else + { + break; + } + } + } + catch(const exception& e) + { + cerr << "An exception has occured in the function nameonly\n" + << e.what(); + exit(1); + } + return tmp; +} + +int AlignmentOutput::SeqGCGCheckSum(vector* seq, int length) +{ + int i; + long check; + int seqResIndex = 1; + //int seqLength = seq->size(); + + for (i = 0, check = 0; i < length; i++, seqResIndex++) + { + char _toUpperCase = (*seq)[seqResIndex]; + check += ((i % 57) + 1) * toupper(_toUpperCase); + } + + return (check % 10000); +} + +void AlignmentOutput::showAlign() +{ + //FILE *file; + int numLines; + char temp[MAXLINE + 1]; + temp[0] = '\0'; + string fileName; + string answer; + + if(userParameters->getOutputClustal()) + { + fileName = clustalOutName; + } + else if(userParameters->getOutputNbrf()) + { + fileName = nbrfOutName; + } + else if(userParameters->getOutputGCG()) + { + fileName = gcgOutName; + } + else if(userParameters->getOutputPhylip()) + { + fileName = phylipOutName; + } + else if(userParameters->getOutputGde()) + { + fileName = gdeOutName; + } + else if(userParameters->getOutputNexus()) + { + fileName = nexusOutName; + } + else if(userParameters->getOutputFasta()) + { + fileName = fastaOutName; + } + else + { + return; // No file output type! + } + + ifstream _fileIn; + _fileIn.open(fileName.c_str(), ios::in); + _fileIn.seekg(0, std::ios::beg); // start at the beginning + + cout << "\n\n"; + numLines = 0; + + while(_fileIn.getline(temp, MAXLINE + 1, '\n')) + { + //fputs(temp,stdout); + cout << temp << "\n"; + ++numLines; + if(numLines >= PAGE_LEN) + { + cout << "\n"; + utilityObject->getStr(string("Press [RETURN] to continue or X to stop"), answer); + if(toupper(answer[0]) == 'X') + { + _fileIn.close(); + return; + } + else + { + numLines = 0; + } + } + } + _fileIn.close(); + cout << "\n"; + utilityObject->getStr(string("Press [RETURN] to continue"), answer); +} + +} +