1 /***********************************************
2 * # Copyright 2009-2010. Liu Yongchao
3 * # Contact: Liu Yongchao, School of Computer Engineering,
4 * # Nanyang Technological University.
5 * # Emails: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
7 * # GPL version 3.0 applies.
9 * ************************************************/
24 #include "MSAClusterTree.h"
31 string parametersInputFilename = "";
32 string parametersOutputFilename = "no training";
33 string annotationFilename = "";
35 bool enableVerbose = false;
36 bool enableAnnotation = false;
37 bool enableClustalWOutput = false;
38 bool enableAlignOrder = false;
39 int numConsistencyReps = 2;
40 int numPreTrainingReps = 0;
41 int numIterativeRefinementReps = 100;
45 VF initDistrib(NumMatrixTypes);
46 VF gapOpen(2 * NumInsertStates);
47 VF gapExtend(2 * NumInsertStates);
48 VVF emitPairs(256, VF(256, 1e-10));
49 VF emitSingle(256, 1e-5);
51 string alphabet = alphabetDefault;
53 const int MIN_PRETRAINING_REPS = 0;
54 const int MAX_PRETRAINING_REPS = 20;
55 const int MIN_CONSISTENCY_REPS = 0;
56 const int MAX_CONSISTENCY_REPS = 5;
57 const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
58 const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
60 string posteriorProbsFilename = "";
61 bool allscores = true;
64 int flag_gui = 0; //0: no gui related o/p
65 //1: gui related o/p generated
66 int flag_ppscore = 0; //0: no pp score sequence added to o/p fasta alignment
67 //1: pp score seq added to o/p fasta alignment
69 ///////////////////////////////
70 // global scoring matrix variables
71 //////////////////////////////
72 float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
73 char *aminos, *bases, matrixtype[20] = "gonnet_160";
76 double sub_matrix[26][26];
77 double normalized_matrix[26][26];// add by YE Yongtao
78 int firstread = 0; //this makes sure that matrices are read only once
80 float TEMPERATURE = 5;
82 int prot_nuc = 0; //0=prot, 1=nucleotide
95 char opt; //can be 'P' or 'M'
100 argument_decl argument;
102 extern inline void read_sustitution_matrix(char *fileName);
103 extern void setmatrixtype(int le);
104 extern inline int matrixtype_to_int();
105 extern inline void read_dna_matrix();
106 extern inline void read_vtml_la_matrix();
107 extern void init_arguments();
109 MSA::MSA(int argc, char* argv[]) {
110 //parse program parameters
111 SafeVector<string> sequenceNames = ParseParams(argc, argv);
113 //initialize arguments for partition function
117 //PrintParameters ("Using parameter set:", initDistrib, gapOpen, gapExtend, emitPairs, emitSingle, NULL);
119 //read the input sequences
120 MultiSequence *sequences = new MultiSequence();
122 for (int i = 0; i < (int) sequenceNames.size(); i++) {
123 cerr << "Loading sequence file: " << sequenceNames[i] << endl;
124 sequences->LoadMFA(sequenceNames[i], true);
126 //allocate space for sequence weights
127 this->seqsWeights = new int[sequences->GetNumSequences()];
128 //initilaize parameters for OPENMP
130 if(numThreads <= 0) {
131 numThreads = omp_get_num_procs();
132 cerr << "Automatically detected " << numThreads << " CPU cores" << endl;
134 cerr <<"Enabling OpenMP (with "<<numThreads<<" threads)"<<endl;
136 //set OpenMP to use dynamic number of threads which is equal to the number of processor cores on the host
137 omp_set_num_threads(numThreads);
139 FILE *fi = fopen ("accuracy", "a");
140 fprintf (fi, "%s ", argv[1]);
143 int levelid = ComputeSimilarity (sequences,ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,emitSingle));
145 // now, we can perform the alignments and write them out
146 MultiSequence *alignment = doAlign(sequences,
147 ProbabilisticModel(initDistrib, gapOpen, gapExtend, emitPairs,
148 emitSingle), levelid);
150 //write the alignment results to standard output
151 if (enableClustalWOutput) {
152 alignment->WriteALN(*alignOutFile);
154 alignment->WriteMFA(*alignOutFile);
157 delete[] this->seqsWeights;
163 /*close the output file*/
164 if (alignOutFileName.length() > 0) {
165 ((std::ofstream*) alignOutFile)->close();
168 /////////////////////////////////////////////////////////////////
171 // Prints MSAPROBS parameters to STDERR. If a filename is
172 // specified, then the parameters are also written to the file.
173 /////////////////////////////////////////////////////////////////
175 void MSA::PrintParameters(const char *message, const VF &initDistrib,
176 const VF &gapOpen, const VF &gapExtend, const VVF &emitPairs,
177 const VF &emitSingle, const char *filename) {
179 // print parameters to the screen
180 cerr << message << endl << " initDistrib[] = { ";
181 for (int i = 0; i < NumMatrixTypes; i++)
182 cerr << setprecision(10) << initDistrib[i] << " ";
183 cerr << "}" << endl << " gapOpen[] = { ";
184 for (int i = 0; i < NumInsertStates * 2; i++)
185 cerr << setprecision(10) << gapOpen[i] << " ";
186 cerr << "}" << endl << " gapExtend[] = { ";
187 for (int i = 0; i < NumInsertStates * 2; i++)
188 cerr << setprecision(10) << gapExtend[i] << " ";
189 cerr << "}" << endl << endl;
191 // if a file name is specified
194 // attempt to open the file for writing
195 FILE *file = fopen(filename, "w");
197 cerr << "ERROR: Unable to write parameter file: " << filename
202 // if successful, then write the parameters to the file
203 for (int i = 0; i < NumMatrixTypes; i++)
204 fprintf(file, "%.10f ", initDistrib[i]);
206 for (int i = 0; i < 2 * NumInsertStates; i++)
207 fprintf(file, "%.10f ", gapOpen[i]);
209 for (int i = 0; i < 2 * NumInsertStates; i++)
210 fprintf(file, "%.10f ", gapExtend[i]);
212 fprintf(file, "%s\n", alphabet.c_str());
213 for (int i = 0; i < (int) alphabet.size(); i++) {
214 for (int j = 0; j <= i; j++)
215 fprintf(file, "%.10f ",
216 emitPairs[(unsigned char) alphabet[i]][(unsigned char) alphabet[j]]);
219 for (int i = 0; i < (int) alphabet.size(); i++)
220 fprintf(file, "%.10f ", emitSingle[(unsigned char) alphabet[i]]);
226 /////////////////////////////////////////////////////////////////
229 // First computes all pairwise posterior probability matrices.
230 // Then, computes new parameters if training, or a final
231 // alignment, otherwise.
232 /////////////////////////////////////////////////////////////////
233 extern VF *ComputePostProbs(int a, int b, string seq1, string seq2);
234 MultiSequence* MSA::doAlign(MultiSequence *sequences,
235 const ProbabilisticModel &model, int levelid) {
238 //get the number of sequences
239 const int numSeqs = sequences->GetNumSequences();
240 //create distance matrix
241 VVF distances(numSeqs, VF(numSeqs, 0));
242 //creat sparseMatrices
243 SafeVector<SafeVector<SparseMatrix *> > sparseMatrices(numSeqs,
244 SafeVector<SparseMatrix *>(numSeqs, NULL));
247 //calculate sequence pairs for openmp model
249 numPairs = (numSeqs - 1) * numSeqs / 2;
250 seqsPairs = new SeqsPair[numPairs];
251 for(int a = 0; a < numSeqs; a++) {
252 for(int b = a + 1; b < numSeqs; b++) {
253 seqsPairs[pairIdx].seq1 = a;
254 seqsPairs[pairIdx].seq2 = b;
259 // do all pairwise alignments for posterior probability matrices
261 #pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
262 for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
263 int a= seqsPairs[pairIdx].seq1;
264 int b = seqsPairs[pairIdx].seq2;
267 cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
270 for (int a = 0; a < numSeqs - 1; a++) {
271 for (int b = a + 1; b < numSeqs; b++) {
273 Sequence *seq1 = sequences->GetSequence(a);
274 Sequence *seq2 = sequences->GetSequence(b);
276 //posterior probability matrix
280 //high similarity use global model
282 if(1) posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
285 //low similarity use local model
286 else if(levelid == 1){
287 VF *forward = model.ComputeForwardMatrix(seq1, seq2,false);
289 VF *backward = model.ComputeBackwardMatrix(seq1, seq2,false);
291 posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
296 //extreme low or extreme high similarity use combined model
300 // compute forward and backward probabilities
301 VF *forward = model.ComputeForwardMatrix(seq1, seq2);
303 VF *backward = model.ComputeBackwardMatrix(seq1, seq2);
305 // compute posterior probability matrix from HMM
306 VF *probcons_posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward);
307 assert(probcons_posterior);
312 VF *probalign_posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
313 assert(probalign_posterior);
315 forward = model.ComputeForwardMatrix(seq1, seq2,false);
317 backward = model.ComputeBackwardMatrix(seq1, seq2,false);
319 posterior = model.ComputePosteriorMatrix(seq1, seq2, *forward,*backward, false);
324 //merge probalign + local + probcons
325 VF::iterator ptr1 = probcons_posterior->begin();
326 VF::iterator ptr2 = probalign_posterior->begin();
327 VF::iterator ptr = posterior->begin();
328 for (int i = 0; i <= seq1->GetLength(); i++) {
329 for (int j = 0; j <= seq2->GetLength(); j++) {
333 *ptr = sqrt((v1*v1 + v2*v2 + v3*v3)/3);
339 delete probcons_posterior;
340 delete probalign_posterior;
344 // perform the pairwise sequence alignment
345 pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
346 seq1->GetLength(), seq2->GetLength(), *posterior);
348 //compute expected accuracy
349 distances[a][b] = distances[b][a] = 1.0f - alignment.second
350 / min(seq1->GetLength(), seq2->GetLength());
352 // compute sparse representations
353 sparseMatrices[a][b] = new SparseMatrix(seq1->GetLength(),
354 seq2->GetLength(), *posterior);
355 sparseMatrices[b][a] = NULL;
358 delete alignment.first;
364 //create the guide tree
365 this->tree = new MSAClusterTree(this, distances, numSeqs);
366 this->tree->create();
368 // perform the consistency transformation the desired number of times
369 float* fweights = new float[numSeqs];
370 for (int r = 0; r < numSeqs; r++) {
371 fweights[r] = ((float) seqsWeights[r]) / INT_MULTIPLY;
374 for (int r = 0; r < numConsistencyReps; r++) {
375 SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices =
376 DoRelaxation(fweights, sequences, sparseMatrices);
378 // now replace the old posterior matrices
379 for (int i = 0; i < numSeqs; i++) {
380 for (int j = 0; j < numSeqs; j++) {
381 delete sparseMatrices[i][j];
382 sparseMatrices[i][j] = newSparseMatrices[i][j];
391 //compute the final multiple sequence alignment
392 MultiSequence *finalAlignment = ComputeFinalAlignment(this->tree, sequences,
393 sparseMatrices, model);
396 if (enableAnnotation) {
397 WriteAnnotation(finalAlignment, sparseMatrices);
399 //destroy the guide tree
403 // delete sparse matrices
404 for (int a = 0; a < numSeqs - 1; a++) {
405 for (int b = a + 1; b < numSeqs; b++) {
406 delete sparseMatrices[a][b];
407 delete sparseMatrices[b][a];
411 return finalAlignment;
414 /////////////////////////////////////////////////////////////////
417 // Attempts to parse an integer from the character string given.
418 // Returns true only if no parsing error occurs.
419 /////////////////////////////////////////////////////////////////
421 bool GetInteger(char *data, int *val) {
428 retVal = strtol(data, &endPtr, 0);
429 if (retVal == 0 && (errno != 0 || data == endPtr))
431 if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
433 if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
439 /////////////////////////////////////////////////////////////////
442 // Attempts to parse a float from the character string given.
443 // Returns true only if no parsing error occurs.
444 /////////////////////////////////////////////////////////////////
446 bool GetFloat(char *data, float *val) {
453 retVal = strtod(data, &endPtr);
454 if (retVal == 0 && (errno != 0 || data == endPtr))
456 if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
458 *val = (float) retVal;
462 /////////////////////////////////////////////////////////////////
465 // Read initial distribution, transition, and emission
466 // parameters from a file.
467 /////////////////////////////////////////////////////////////////
469 void MSA::ReadParameters() {
473 emitPairs = VVF(256, VF(256, 1e-10));
474 emitSingle = VF(256, 1e-5);
476 // read initial state distribution and transition parameters
477 if (parametersInputFilename == string("")) {
478 if (NumInsertStates == 1) {
479 for (int i = 0; i < NumMatrixTypes; i++)
480 initDistrib[i] = initDistrib1Default[i];
481 for (int i = 0; i < 2 * NumInsertStates; i++)
482 gapOpen[i] = gapOpen1Default[i];
483 for (int i = 0; i < 2 * NumInsertStates; i++)
484 gapExtend[i] = gapExtend1Default[i];
485 } else if (NumInsertStates == 2) {
486 for (int i = 0; i < NumMatrixTypes; i++)
487 initDistrib[i] = initDistrib2Default[i];
488 for (int i = 0; i < 2 * NumInsertStates; i++)
489 gapOpen[i] = gapOpen2Default[i];
490 for (int i = 0; i < 2 * NumInsertStates; i++)
491 gapExtend[i] = gapExtend2Default[i];
494 << "ERROR: No default initial distribution/parameter settings exist"
495 << endl << " for " << NumInsertStates
496 << " pairs of insert states. Use --paramfile." << endl;
500 alphabet = alphabetDefault;
502 for (int i = 0; i < (int) alphabet.length(); i++) {
503 emitSingle[(unsigned char) tolower(alphabet[i])] =
504 emitSingleDefault[i];
505 emitSingle[(unsigned char) toupper(alphabet[i])] =
506 emitSingleDefault[i];
507 for (int j = 0; j <= i; j++) {
508 emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
509 alphabet[j])] = emitPairsDefault[i][j];
510 emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
511 alphabet[j])] = emitPairsDefault[i][j];
512 emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
513 alphabet[j])] = emitPairsDefault[i][j];
514 emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
515 alphabet[j])] = emitPairsDefault[i][j];
516 emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
517 alphabet[i])] = emitPairsDefault[i][j];
518 emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
519 alphabet[i])] = emitPairsDefault[i][j];
520 emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
521 alphabet[i])] = emitPairsDefault[i][j];
522 emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
523 alphabet[i])] = emitPairsDefault[i][j];
527 data.open(parametersInputFilename.c_str());
529 cerr << "ERROR: Unable to read parameter file: "
530 << parametersInputFilename << endl;
535 for (int i = 0; i < 3; i++) {
536 if (!getline(data, line[i])) {
538 << "ERROR: Unable to read transition parameters from parameter file: "
539 << parametersInputFilename << endl;
546 for (int i = 0; i < NumMatrixTypes; i++)
547 data2 >> initDistrib[i];
550 for (int i = 0; i < 2 * NumInsertStates; i++)
554 for (int i = 0; i < 2 * NumInsertStates; i++)
555 data2 >> gapExtend[i];
557 if (!getline(data, line[0])) {
558 cerr << "ERROR: Unable to read alphabet from scoring matrix file: "
559 << parametersInputFilename << endl;
563 // read alphabet as concatenation of all characters on alphabet line
568 while (data2 >> token)
571 for (int i = 0; i < (int) alphabet.size(); i++) {
572 for (int j = 0; j <= i; j++) {
575 emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) tolower(
577 emitPairs[(unsigned char) tolower(alphabet[i])][(unsigned char) toupper(
579 emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) tolower(
581 emitPairs[(unsigned char) toupper(alphabet[i])][(unsigned char) toupper(
583 emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) tolower(
585 emitPairs[(unsigned char) tolower(alphabet[j])][(unsigned char) toupper(
587 emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) tolower(
589 emitPairs[(unsigned char) toupper(alphabet[j])][(unsigned char) toupper(
594 for (int i = 0; i < (int) alphabet.size(); i++) {
597 emitSingle[(unsigned char) tolower(alphabet[i])] = val;
598 emitSingle[(unsigned char) toupper(alphabet[i])] = val;
604 /////////////////////////////////////////////////////////////////
607 // Parse all command-line options.
608 /////////////////////////////////////////////////////////////////
609 void MSA::printUsage() {
611 << "************************************************************************"
613 << "\tMSAPROBS is a open-source protein multiple sequence alignment algorithm"
615 << "\tbased on pair hidden markov model and partition function postirior"
617 << "\tprobabilities. If any comments or problems, please contact"
619 << "\tLiu Yongchao(liuy0039@ntu.edu.sg or nkcslyc@hotmail.com)"
621 << "*************************************************************************"
622 << endl << "Usage:" << endl
623 << " msaprobs [OPTION]... [infile]..." << endl << endl
624 << "Description:" << endl
625 << " Align sequences in multi-FASTA format" << endl << endl
626 << " -o, --outfile <string>" << endl
627 << " specify the output file name (STDOUT by default)"
628 << endl << " -num_threads <integer>" << endl
629 << " specify the number of threads used, and otherwise detect automatically"
630 << endl << " -clustalw" << endl
631 << " use CLUSTALW output format instead of FASTA format"
632 << endl << endl << " -c, --consistency REPS" << endl
633 << " use " << MIN_CONSISTENCY_REPS << " <= REPS <= "
634 << MAX_CONSISTENCY_REPS << " (default: " << numConsistencyReps
635 << ") passes of consistency transformation" << endl << endl
636 << " -ir, --iterative-refinement REPS" << endl
637 << " use " << MIN_ITERATIVE_REFINEMENT_REPS
638 << " <= REPS <= " << MAX_ITERATIVE_REFINEMENT_REPS << " (default: "
639 << numIterativeRefinementReps << ") passes of iterative-refinement"
640 << endl << endl << " -v, --verbose" << endl
641 << " report progress while aligning (default: "
642 << (enableVerbose ? "on" : "off") << ")" << endl << endl
643 << " -annot FILENAME" << endl
644 << " write annotation for multiple alignment to FILENAME"
645 << endl << endl << " -a, --alignment-order" << endl
646 << " print sequences in alignment order rather than input order (default: "
647 << (enableAlignOrder ? "on" : "off") << ")" << endl
648 << " -version " << endl
649 << " print out version of MSAPROBS " << endl << endl;
651 SafeVector<string> MSA::ParseParams(int argc, char **argv) {
656 SafeVector<string> sequenceNames;
660 for (int i = 1; i < argc; i++) {
661 if (argv[i][0] == '-') {
663 if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "-?")) {
667 } else if (!strcmp(argv[i], "-o")
668 || !strcmp(argv[i], "--outfile")) {
670 alignOutFileName = argv[++i]; //get the file name
672 cerr << "ERROR: String expected for option " << argv[i]
677 } else if (!strcmp (argv[i], "-p") || !strcmp (argv[i], "--paramfile")){
679 parametersInputFilename = string (argv[++i]);
681 cerr << "ERROR: Filename expected for option " << argv[i] << endl;
684 //number of threads used
685 } else if (!strcmp(argv[i], "-p")
686 || !strcmp(argv[i], "-num_threads")) {
688 if (!GetInteger(argv[++i], &tempInt)) {
689 cerr << " ERROR: invalid integer following option "
690 << argv[i - 1] << ": " << argv[i] << endl;
696 numThreads = tempInt;
699 cerr << "ERROR: Integer expected for option " << argv[i]
703 // number of consistency transformations
704 } else if (!strcmp(argv[i], "-c")
705 || !strcmp(argv[i], "--consistency")) {
707 if (!GetInteger(argv[++i], &tempInt)) {
708 cerr << "ERROR: Invalid integer following option "
709 << argv[i - 1] << ": " << argv[i] << endl;
712 if (tempInt < MIN_CONSISTENCY_REPS
713 || tempInt > MAX_CONSISTENCY_REPS) {
714 cerr << "ERROR: For option " << argv[i - 1]
715 << ", integer must be between "
716 << MIN_CONSISTENCY_REPS << " and "
717 << MAX_CONSISTENCY_REPS << "." << endl;
720 numConsistencyReps = tempInt;
724 cerr << "ERROR: Integer expected for option " << argv[i]
730 // number of randomized partitioning iterative refinement passes
731 else if (!strcmp(argv[i], "-ir")
732 || !strcmp(argv[i], "--iterative-refinement")) {
734 if (!GetInteger(argv[++i], &tempInt)) {
735 cerr << "ERROR: Invalid integer following option "
736 << argv[i - 1] << ": " << argv[i] << endl;
739 if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
740 || tempInt > MAX_ITERATIVE_REFINEMENT_REPS) {
741 cerr << "ERROR: For option " << argv[i - 1]
742 << ", integer must be between "
743 << MIN_ITERATIVE_REFINEMENT_REPS << " and "
744 << MAX_ITERATIVE_REFINEMENT_REPS << "."
748 numIterativeRefinementReps = tempInt;
751 cerr << "ERROR: Integer expected for option " << argv[i]
758 else if (!strcmp(argv[i], "-annot")) {
759 enableAnnotation = true;
761 annotationFilename = argv[++i];
763 cerr << "ERROR: FILENAME expected for option " << argv[i]
769 // clustalw output format
770 else if (!strcmp(argv[i], "-clustalw")) {
771 enableClustalWOutput = true;
775 else if (!strcmp(argv[i], "-co") || !strcmp(argv[i], "--cutoff")) {
777 if (!GetFloat(argv[++i], &tempFloat)) {
779 << "ERROR: Invalid floating-point value following option "
780 << argv[i - 1] << ": " << argv[i] << endl;
783 if (tempFloat < 0 || tempFloat > 1) {
784 cerr << "ERROR: For option " << argv[i - 1]
785 << ", floating-point value must be between 0 and 1."
792 cerr << "ERROR: Floating-point value expected for option "
799 else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--verbose")) {
800 enableVerbose = true;
804 else if (!strcmp(argv[i], "-a")
805 || !strcmp(argv[i], "--alignment-order")) {
806 enableAlignOrder = true;
810 else if (!strcmp(argv[i], "-version")) {
811 cerr << "MSAPROBS version " << VERSION << endl;
816 cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
820 sequenceNames.push_back(string(argv[i]));
824 /*check the output file name*/
825 cerr << "-------------------------------------" << endl;
826 if (alignOutFileName.length() == 0) {
827 cerr << "The final alignments will be printed out to STDOUT" << endl;
828 alignOutFile = &std::cout;
830 cerr << "Open the output file " << alignOutFileName << endl;
831 alignOutFile = new ofstream(alignOutFileName.c_str(),
832 ios::binary | ios::out | ios::trunc);
834 cerr << "-------------------------------------" << endl;
835 return sequenceNames;
838 /////////////////////////////////////////////////////////////////
841 // Process the tree recursively. Returns the aligned sequences
842 // corresponding to a node or leaf of the tree.
843 /////////////////////////////////////////////////////////////////
844 MultiSequence* MSA::ProcessTree(TreeNode *tree, MultiSequence *sequences,
845 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
846 const ProbabilisticModel &model) {
848 MultiSequence *result;
850 // check if this is a node of the alignment tree
851 //if (tree->GetSequenceLabel() == -1){
852 if (tree->leaf == NODE) {
853 MultiSequence *alignLeft = ProcessTree(tree->left, sequences,
854 sparseMatrices, model);
855 MultiSequence *alignRight = ProcessTree(tree->right, sequences,
856 sparseMatrices, model);
861 result = AlignAlignments(alignLeft, alignRight, sparseMatrices, model);
868 // otherwise, this is a leaf of the alignment tree
870 result = new MultiSequence();
872 //result->AddSequence (sequences->GetSequence(tree->GetSequenceLabel())->Clone());
873 result->AddSequence(sequences->GetSequence(tree->idx)->Clone());
879 /////////////////////////////////////////////////////////////////
880 // ComputeFinalAlignment()
882 // Compute the final alignment by calling ProcessTree(), then
883 // performing iterative refinement as needed.
884 /////////////////////////////////////////////////////////////////
886 MultiSequence* MSA::ComputeFinalAlignment(MSAGuideTree*tree,
887 MultiSequence *sequences,
888 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
889 const ProbabilisticModel &model) {
890 MultiSequence *alignment = ProcessTree(tree->getRoot(), sequences,
891 sparseMatrices, model);
893 SafeVector<int> oldOrdering;
894 int numSeqs = alignment->GetNumSequences();
895 if (enableAlignOrder) {
896 for (int i = 0; i < numSeqs; i++)
897 oldOrdering.push_back(alignment->GetSequence(i)->GetSortLabel());
898 alignment->SaveOrdering();
899 enableAlignOrder = false;
902 // tree-based refinement
903 // TreeBasedBiPartitioning (sparseMatrices, model, alignment, tree);
905 int numSeqs = alignment->GetNumSequences();
906 //if(numSeqs < numIterativeRefinementReps){
907 for(int iter = 0; iter < 5; iter ++){
908 for(int i = 0; i < numSeqs - 1; i++){
909 DoIterativeRefinementTreeNode(sparseMatrices, model, alignment, i);
913 //DoIterativeRefinement() return 1,2: this refinement unsuccessful
915 int ineffectiveness = 0;
916 for (int i = 0; i < numIterativeRefinementReps; i++){
917 int flag = DoIterativeRefinement(sparseMatrices, model, alignment);
920 if(numIterativeRefinementReps < 20*numSeqs)
921 numIterativeRefinementReps ++;
922 if(flag == 1) ineffectiveness ++;
924 //else ineffectiveness = 0;
925 if(ineffectiveness > 2*numSeqs && i >100 ) break;
930 for (int i = 0; i < numIterativeRefinementReps; i++)
931 DoIterativeRefinement(sparseMatrices, model, alignment);
935 if (oldOrdering.size() > 0) {
936 for (int i = 0; i < (int) oldOrdering.size(); i++) {
937 alignment->GetSequence(i)->SetSortLabel(oldOrdering[i]);
941 // return final alignment
945 /////////////////////////////////////////////////////////////////
948 // Returns the alignment of two MultiSequence objects.
949 /////////////////////////////////////////////////////////////////
951 MultiSequence* MSA::AlignAlignments(MultiSequence *align1,
952 MultiSequence *align2,
953 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
954 const ProbabilisticModel &model) {
956 // print some info about the alignment
958 for (int i = 0; i < align1->GetNumSequences(); i++)
959 cerr << ((i == 0) ? "[" : ",")
960 << align1->GetSequence(i)->GetLabel();
962 for (int i = 0; i < align2->GetNumSequences(); i++)
963 cerr << ((i == 0) ? "[" : ",")
964 << align2->GetSequence(i)->GetLabel();
968 VF *posterior = model.BuildPosterior (align1, align2, sparseMatrices, cutoff);
970 VF *posterior = model.BuildPosterior(getSeqsWeights(), align1, align2,
971 sparseMatrices, cutoff);
973 // compute an "accuracy" measure for the alignment before refinement
975 pair<SafeVector<char> *, float> alignment;
977 alignment = model.ComputeAlignment(align1->GetSequence(0)->GetLength(),
978 align2->GetSequence(0)->GetLength(), *posterior);
984 // compute total length of sequences
986 for (int i = 0; i < align1->GetNumSequences(); i++)
987 for (int j = 0; j < align2->GetNumSequences(); j++)
988 totLength += min(align1->GetSequence(i)->GetLength(),
989 align2->GetSequence(j)->GetLength());
991 // give an "accuracy" measure for the alignment
992 cerr << alignment.second / totLength << endl;
995 // now build final alignment
996 MultiSequence *result = new MultiSequence();
997 for (int i = 0; i < align1->GetNumSequences(); i++)
999 align1->GetSequence(i)->AddGaps(alignment.first, 'X'));
1000 for (int i = 0; i < align2->GetNumSequences(); i++)
1001 result->AddSequence(
1002 align2->GetSequence(i)->AddGaps(alignment.first, 'Y'));
1003 if (!enableAlignOrder)
1004 result->SortByLabel();
1006 // free temporary alignment
1007 delete alignment.first;
1012 /////////////////////////////////////////////////////////////////
1015 // Performs one round of the weighted probabilistic consistency transformation.
1017 /////////////////////////////////////////////////////////////////
1019 SafeVector<SafeVector<SparseMatrix *> > MSA::DoRelaxation(float* seqsWeights,
1020 MultiSequence *sequences,
1021 SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
1022 const int numSeqs = sequences->GetNumSequences();
1024 SafeVector<SafeVector<SparseMatrix *> > newSparseMatrices(numSeqs,
1025 SafeVector<SparseMatrix *>(numSeqs, NULL));
1027 // for every pair of sequences
1030 #pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
1031 for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
1032 int i = seqsPairs[pairIdx].seq1;
1033 int j = seqsPairs[pairIdx].seq2;
1034 float wi = seqsWeights[i];
1035 float wj = seqsWeights[j];
1037 for (int i = 0; i < numSeqs; i++) {
1038 float wi = seqsWeights[i];
1039 for (int j = i + 1; j < numSeqs; j++) {
1040 float wj = seqsWeights[j];
1042 Sequence *seq1 = sequences->GetSequence(i);
1043 Sequence *seq2 = sequences->GetSequence(j);
1045 if (enableVerbose) {
1047 #pragma omp critical
1049 cerr << "Relaxing (" << i + 1 << ") " << seq1->GetHeader()
1050 << " vs. " << "(" << j + 1 << ") " << seq2->GetHeader()
1053 // get the original posterior matrix
1054 VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
1055 assert(posteriorPtr);
1056 VF &posterior = *posteriorPtr;
1058 const int seq1Length = seq1->GetLength();
1059 const int seq2Length = seq2->GetLength();
1061 // contribution from the summation where z = x and z = y
1062 float w = wi * wi * wj + wi * wj * wj;
1064 for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
1065 //posterior[k] = w*posterior[k];
1066 posterior[k] += posterior[k];
1070 cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";
1072 // contribution from all other sequences
1073 for (int k = 0; k < numSeqs; k++) {
1074 if (k != i && k != j) {
1075 float wk = seqsWeights[k];
1076 float w = wi * wj * wk;
1079 Relax1(w, sparseMatrices[k][i], sparseMatrices[k][j],
1081 else if (k > i && k < j)
1082 Relax(w, sparseMatrices[i][k], sparseMatrices[k][j],
1085 SparseMatrix *temp =
1086 sparseMatrices[j][k]->ComputeTranspose();
1087 Relax(w, sparseMatrices[i][k], temp, posterior);
1092 //cerr<<"sumW "<<sumW<<endl;
1093 for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++) {
1094 //posterior[k] /= sumW;
1095 posterior[k] /= numSeqs;
1097 // mask out positions not originally in the posterior matrix
1098 SparseMatrix *matXY = sparseMatrices[i][j];
1099 for (int y = 0; y <= seq2Length; y++)
1101 for (int x = 1; x <= seq1Length; x++) {
1102 SafeVector<PIF>::iterator XYptr = matXY->GetRowPtr(x);
1103 SafeVector<PIF>::iterator XYend = XYptr + matXY->GetRowSize(x);
1104 VF::iterator base = posterior.begin() + x * (seq2Length + 1);
1106 while (XYptr != XYend) {
1108 // zero out all cells until the first filled column
1109 while (curr < XYptr->first) {
1114 // now, skip over this column
1119 // zero out cells after last column
1120 while (curr <= seq2Length) {
1126 // save the new posterior matrix
1127 newSparseMatrices[i][j] = new SparseMatrix(seq1->GetLength(),
1128 seq2->GetLength(), posterior);
1129 newSparseMatrices[j][i] = NULL;
1132 cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";
1134 delete posteriorPtr;
1137 cerr << "done." << endl;
1143 return newSparseMatrices;
1146 /////////////////////////////////////////////////////////////////
1149 // Computes the consistency transformation for a single sequence
1150 // z, and adds the transformed matrix to "posterior".
1151 /////////////////////////////////////////////////////////////////
1153 void MSA::Relax(float weight, SparseMatrix *matXZ, SparseMatrix *matZY,
1159 int lengthX = matXZ->GetSeq1Length();
1160 int lengthY = matZY->GetSeq2Length();
1161 assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());
1164 for (int i = 1; i <= lengthX; i++) {
1165 SafeVector<PIF>::iterator XZptr = matXZ->GetRowPtr(i);
1166 SafeVector<PIF>::iterator XZend = XZptr + matXZ->GetRowSize(i);
1168 VF::iterator base = posterior.begin() + i * (lengthY + 1);
1170 // iterate through all x[i]-z[k]
1171 while (XZptr != XZend) {
1172 SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(XZptr->first);
1173 SafeVector<PIF>::iterator ZYend = ZYptr
1174 + matZY->GetRowSize(XZptr->first);
1175 const float XZval = XZptr->second;
1177 // iterate through all z[k]-y[j]
1178 while (ZYptr != ZYend) {
1179 //base[ZYptr->first] += weight * XZval * ZYptr->second;
1180 base[ZYptr->first] += XZval * ZYptr->second;
1188 /////////////////////////////////////////////////////////////////
1191 // Computes the consistency transformation for a single sequence
1192 // z, and adds the transformed matrix to "posterior".
1193 /////////////////////////////////////////////////////////////////
1195 void MSA::Relax1(float weight, SparseMatrix *matZX, SparseMatrix *matZY,
1201 int lengthZ = matZX->GetSeq1Length();
1202 int lengthY = matZY->GetSeq2Length();
1205 for (int k = 1; k <= lengthZ; k++) {
1206 SafeVector<PIF>::iterator ZXptr = matZX->GetRowPtr(k);
1207 SafeVector<PIF>::iterator ZXend = ZXptr + matZX->GetRowSize(k);
1209 // iterate through all z[k]-x[i]
1210 while (ZXptr != ZXend) {
1211 SafeVector<PIF>::iterator ZYptr = matZY->GetRowPtr(k);
1212 SafeVector<PIF>::iterator ZYend = ZYptr + matZY->GetRowSize(k);
1213 const float ZXval = ZXptr->second;
1214 VF::iterator base = posterior.begin()
1215 + ZXptr->first * (lengthY + 1);
1217 // iterate through all z[k]-y[j]
1218 while (ZYptr != ZYend) {
1219 //base[ZYptr->first] += weight * ZXval * ZYptr->second;
1220 base[ZYptr->first] += ZXval * ZYptr->second;
1227 /////////////////////////////////////////////////////////////////
1228 // DoIterativeRefinement()
1230 // Performs a single round of randomized partionining iterative
1232 // return 0: successful refinement, 1: ineffective refinement, 2: random problem
1233 /////////////////////////////////////////////////////////////////
1234 int MSA::DoIterativeRefinement(
1235 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
1236 const ProbabilisticModel &model, MultiSequence* &alignment) {
1237 set<int> groupOne, groupTwo;
1238 int numSeqs = alignment->GetNumSequences();
1240 // create two separate groups
1241 for (i = 0; i < numSeqs; i++) {
1249 if (groupOne.empty() || groupTwo.empty()) return 2;
1251 // project into the two groups
1252 MultiSequence *groupOneSeqs = alignment->Project(groupOne);
1253 assert(groupOneSeqs);
1254 MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
1255 assert(groupTwoSeqs);
1257 //start add by Yongtao
1259 VF *posterior = model.BuildPosterior (groupOneSeqs, groupTwoSeqs, sparseMatrices, cutoff);
1261 VF *posterior = model.BuildPosterior(getSeqsWeights(), groupOneSeqs, groupTwoSeqs,
1262 sparseMatrices, cutoff);
1264 // compute an "accuracy" measure for the alignment before refinement
1265 SafeVector<SafeVector<char>::iterator> oldOnePtrs(groupOne.size());
1266 SafeVector<SafeVector<char>::iterator> oldTwoPtrs(groupTwo.size());
1268 for (set<int>::const_iterator iter = groupOne.begin();
1269 iter != groupOne.end(); ++iter) {
1270 oldOnePtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
1273 for (set<int>::const_iterator iter = groupTwo.begin();
1274 iter != groupTwo.end(); ++iter) {
1275 oldTwoPtrs[i++] = alignment->GetSequence(*iter)->GetDataPtr();
1278 VF &posteriorArr = *posterior;
1279 int oldLength = alignment->GetSequence(0)->GetLength();
1280 int groupOneindex=0; int groupTwoindex=0;
1281 float accuracy_before = 0;
1283 for (i = 1; i <= oldLength; i++) {
1284 // check to see if there is a gap in every sequence of the set
1285 bool foundOne = false;
1286 for (j = 0; !foundOne && j < (int) groupOne.size(); j++)
1287 foundOne = (oldOnePtrs[j][i] != '-');
1288 // if not, then this column counts towards the sequence length
1289 if (foundOne) groupOneindex ++;
1290 bool foundTwo = false;
1291 for (j = 0; !foundTwo && j < (int) groupTwo.size(); j++)
1292 foundTwo = (oldTwoPtrs[j][i] != '-');
1293 if (foundTwo) groupTwoindex ++;
1294 if(foundOne && foundTwo) accuracy_before +=
1295 posteriorArr[groupOneindex * (groupTwoSeqs->GetSequence(0)->GetLength() + 1) + groupTwoindex];
1298 pair<SafeVector<char> *, float> refinealignment;
1300 refinealignment = model.ComputeAlignment(groupOneSeqs->GetSequence(0)->GetLength(),
1301 groupTwoSeqs->GetSequence(0)->GetLength(), *posterior);
1303 // now build final alignment
1304 MultiSequence *result = new MultiSequence();
1305 for (int i = 0; i < groupOneSeqs->GetNumSequences(); i++)
1306 result->AddSequence(
1307 groupOneSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'X'));
1308 for (int i = 0; i < groupTwoSeqs->GetNumSequences(); i++)
1309 result->AddSequence(
1310 groupTwoSeqs->GetSequence(i)->AddGaps(refinealignment.first, 'Y'));
1311 // free temporary alignment
1312 delete refinealignment.first;
1315 delete groupOneSeqs;
1316 delete groupTwoSeqs;
1317 if(accuracy_before == refinealignment.second) return 1;
1322 void MSA::DoIterativeRefinementTreeNode(
1323 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices,
1324 const ProbabilisticModel &model, MultiSequence* &alignment,
1326 set<int> groupOne, groupTwo;
1327 int numSeqs = alignment->GetNumSequences();
1329 vector<bool> inGroup1;
1330 inGroup1.resize(numSeqs);
1331 for (int i = 0; i < numSeqs; i++) {
1332 inGroup1[i] = false;
1335 AlignmentOrder* orders = this->tree->getAlignOrders();
1336 AlignmentOrder* order = &orders[nodeIndex];
1337 for (int i = 0; i < order->leftNum; i++) {
1338 int si = order->leftLeafs[i];
1339 inGroup1[si] = true;
1341 for (int i = 0; i < order->rightNum; i++) {
1342 int si = order->rightLeafs[i];
1343 inGroup1[si] = true;
1345 // create two separate groups
1346 for (int i = 0; i < numSeqs; i++) {
1353 if (groupOne.empty() || groupTwo.empty())
1356 // project into the two groups
1357 MultiSequence *groupOneSeqs = alignment->Project(groupOne);
1358 assert(groupOneSeqs);
1359 MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
1360 assert(groupTwoSeqs);
1364 alignment = AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices,
1367 delete groupOneSeqs;
1368 delete groupTwoSeqs;
1371 /////////////////////////////////////////////////////////////////
1372 // WriteAnnotation()
1374 // Computes annotation for multiple alignment and write values
1376 /////////////////////////////////////////////////////////////////
1378 void MSA::WriteAnnotation(MultiSequence *alignment,
1379 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
1380 ofstream outfile(annotationFilename.c_str());
1382 if (outfile.fail()) {
1383 cerr << "ERROR: Unable to write annotation file." << endl;
1387 const int alignLength = alignment->GetSequence(0)->GetLength();
1388 const int numSeqs = alignment->GetNumSequences();
1390 SafeVector<int> position(numSeqs, 0);
1391 SafeVector<SafeVector<char>::iterator> seqs(numSeqs);
1392 for (int i = 0; i < numSeqs; i++)
1393 seqs[i] = alignment->GetSequence(i)->GetDataPtr();
1394 SafeVector<pair<int, int> > active;
1395 active.reserve(numSeqs);
1397 SafeVector<int> lab;
1398 for (int i = 0; i < numSeqs; i++)
1399 lab.push_back(alignment->GetSequence(i)->GetSortLabel());
1402 for (int i = 1; i <= alignLength; i++) {
1404 // find all aligned residues in this particular column
1406 for (int j = 0; j < numSeqs; j++) {
1407 if (seqs[j][i] != '-') {
1408 active.push_back(make_pair(lab[j], ++position[j]));
1412 sort(active.begin(), active.end());
1413 outfile << setw(4) << ComputeScore(active, sparseMatrices) << endl;
1419 /////////////////////////////////////////////////////////////////
1422 // Computes the annotation score for a particular column.
1423 /////////////////////////////////////////////////////////////////
1425 int MSA::ComputeScore(const SafeVector<pair<int, int> > &active,
1426 const SafeVector<SafeVector<SparseMatrix *> > &sparseMatrices) {
1428 if (active.size() <= 1)
1431 // ALTERNATIVE #1: Compute the average alignment score.
1434 for (int i = 0; i < (int) active.size(); i++) {
1435 for (int j = i + 1; j < (int) active.size(); j++) {
1436 val += sparseMatrices[active[i].first][active[j].first]->GetValue(
1437 active[i].second, active[j].second);
1441 return (int) (200 * val / ((int) active.size() * ((int) active.size() - 1)));
1445 /////////////////////////////////////////////////////////////////
1446 // ComputeSimilarity ()
1448 // Computes the average similarity for a particular family.
1449 // extreme low or extreme high similarity(<=20% or >80%) return 0
1450 // low similarity(20%-50%) return 1
1451 // high similarity(50%-80%) return 2
1452 /////////////////////////////////////////////////////////////////
1453 extern pair<SafeVector<char> *, float> partViterbi(string seq1, string seq2);
1454 extern float computeS(string seq1, string seq2, SafeVector<char> * alignment);
1456 int MSA::ComputeSimilarity (MultiSequence *sequences,const ProbabilisticModel &model){
1459 //get the number of sequences
1460 const int numSeqs = sequences->GetNumSequences();
1461 //average identity for all sequences
1465 //calculate sequence pairs for openmp model
1467 numPairs = (numSeqs - 1) * numSeqs / 2;
1468 seqsPairs = new SeqsPair[numPairs];
1469 for(int a = 0; a < numSeqs; a++) {
1470 for(int b = a + 1; b < numSeqs; b++) {
1471 seqsPairs[pairIdx].seq1 = a;
1472 seqsPairs[pairIdx].seq2 = b;
1478 // do all pairwise alignments for family similarity
1480 #pragma omp parallel for private(pairIdx) default(shared) schedule(dynamic)
1481 for(pairIdx = 0; pairIdx < numPairs; pairIdx++) {
1482 int a= seqsPairs[pairIdx].seq1;
1483 int b = seqsPairs[pairIdx].seq2;
1485 #pragma omp critical
1486 cerr <<"tid "<<omp_get_thread_num()<<" a "<<a<<" b "<<b<<endl;
1489 for (int a = 0; a < numSeqs - 1; a++) {
1490 for (int b = a + 1; b < numSeqs; b++) {
1492 Sequence *seq1 = sequences->GetSequence(a);
1493 Sequence *seq2 = sequences->GetSequence(b);
1495 //pair<SafeVector<char> *, float> alignment = ::partViterbi(seq1->GetString(),seq2->GetString());
1496 //cerr << alignment.second / alignment.first->size();
1497 //cerr << computeS(seq1->GetString(),seq2->GetString(),alignment.first)<< endl;
1498 pair<SafeVector<char> *, float> alignment = model.ComputeViterbiAlignment(seq1,seq2);
1500 VF* posterior = ::ComputePostProbs(a, b, seq1->GetString(),seq2->GetString());
1501 pair<SafeVector<char> *, float> alignment = model.ComputeAlignment(
1502 seq1->GetLength(), seq2->GetLength(), *posterior);
1505 SafeVector<char>::iterator iter1 = seq1->GetDataPtr();
1506 SafeVector<char>::iterator iter2 = seq2->GetDataPtr();
1508 float N_correct_match = 0;
1510 //float N_column = 0;
1511 //float N_alignment = 0;
1512 int i = 1;int j = 1;
1513 //bool start = false; bool end = false;
1514 for (SafeVector<char>::iterator iter = alignment.first->begin();
1515 iter != alignment.first->end(); ++iter){
1519 //if(i==seq1->GetLength() || j==seq2->GetLength()) end = true;
1520 unsigned char c1 = (unsigned char) iter1[i++];
1521 unsigned char c2 = (unsigned char) iter2[j++];
1522 if(c1==c2) N_correct_match += 1;
1524 else if(*iter == 'X') i++;
1525 else if(*iter == 'Y') j++;
1526 //if(start && !end) N_column += 1;
1529 if(i!= seq1->GetLength()+1 || j!= seq2->GetLength() + 1 ) cerr << "similarity error"<< endl;
1530 identity += N_correct_match / N_alignment;
1533 identity += alignment.second / alignment.first->size();
1534 delete alignment.first;
1539 identity /= numPairs;
1541 FILE *fi = fopen ("accuracy", "a");
1542 fprintf (fi, " %.10f ", identity); fprintf (fi, "\n");
1547 if(identity <= 0.15) initDistrib[2] = 0.143854;
1548 else if(identity <= 0.2) initDistrib[2] = 0.191948;
1549 else if(identity <= 0.25) initDistrib[2] = 0.170705;
1550 else if(identity <= 0.3) initDistrib[2] = 0.100675;
1551 else if(identity <= 0.35) initDistrib[2] = 0.090755;
1552 else if(identity <= 0.4) initDistrib[2] = 0.146188;
1553 else if(identity <= 0.45) initDistrib[2] = 0.167858;
1554 else if(identity <= 0.5) initDistrib[2] = 0.250769;
1555 //else if(identity <= 0.6) initDistrib[2] = 0.500829;
1556 //else if(identity <= 0.7) initDistrib[2] = 0.259622;
1558 if( identity<= 0.25 || identity > 0.8 ) return 0;
1559 else if(identity > 0.2 && identity<= 0.4) return 1;