X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Ftcoffee%2Ft_coffee_source%2Futil_dp_suboptimal_nw.c;h=c4a09b95853065d4776367c92d24d043adc2c30f;hb=338caee91f39abcaef1ae33c3468bb65f4cc3165;hp=acb4b79a44778252a5f0743c3395729281707014;hpb=535359a3d592ee41bda72e7356f0181f6cee9d07;p=jabaws.git diff --git a/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c b/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c index acb4b79..c4a09b9 100644 --- a/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c +++ b/binaries/src/tcoffee/t_coffee_source/util_dp_suboptimal_nw.c @@ -17,17 +17,12 @@ static float LOG_UNDERFLOW_THRESHOLD = 7.50f; //static float LOG_ZERO = -FLT_MAX; static float LOG_ZERO=-200000004008175468544.000000; static float LOG_ONE = 0.0f; +//DNA Alignment Models +static float DNAinitDistrib2Default[] ={ 0.9588437676f, 0.0205782652f, 0.0205782652f }; +static float DNAgapOpen2Default[] = { 0.0190259293f, 0.0190259293f }; +static float DNAgapExtend2Default[] = { 0.3269913495f, 0.3269913495f }; -//Probabilistic Alignment DNA - - - - -static float DNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f }; -static float DNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f }; -static float DNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f }; - -static char DNAalphabetDefault[] = "ACGUTN"; +static char DNAalphabetDefault[] = "ACGUTN"; static float DNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f}; static float DNAemitPairsDefault[6][6] = { @@ -38,12 +33,25 @@ static float DNAemitPairsDefault[6][6] = { { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f }, { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f } }; +//RNA Alignment Models -//Probabilistic ALignment Blosum62mt - +static float RNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f }; +static float RNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f }; +static float RNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f }; +static char RNAalphabetDefault[] = "ACGUTN"; +static float RNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f}; +static float RNAemitPairsDefault[6][6] = { + { 0.1487240046f, 0.0184142999f, 0.0361397006f, 0.0238473993f, 0.0238473993f, 0.0000375308f }, + { 0.0184142999f, 0.1583919972f, 0.0275536999f, 0.0389291011f, 0.0389291011f, 0.0000815823f }, + { 0.0361397006f, 0.0275536999f, 0.1979320049f, 0.0244289003f, 0.0244289003f, 0.0000824765f }, + { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f }, + { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f }, + { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f } +}; +//Protein Alignment Models static float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f, 8.615339902e-05f, 0.1591759622f, 0.1591759622 }; static float gapOpen2Default[] = { 0.0119511066f, 0.0119511066f, 0.008008334786f, 0.008008334786 }; static float gapExtend2Default[] = { 0.3965826333f, 0.3965826333f, 0.8988758326f, 0.8988758326 }; @@ -186,7 +194,7 @@ int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, id=idscore_pairseq(seqI,seqJ,-12, -1, CL->M, "idmat"); - entry=vcalloc ( CL->entry_len, CL->el_size); + entry=vcalloc ( CL->entry_len+1, CL->el_size); entry[SEQ1]=s1;entry[SEQ2]=s2; thres=opt; @@ -203,6 +211,7 @@ int suboptimal_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL, entry[R1]=i;entry[R2]=j; entry[WE]=id; entry[CONS]=1; + add_entry2list (entry,A->CL); } } @@ -603,14 +612,14 @@ void free_proba_pair_wise () } int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) { - int NumMatrixTypes=5; - int NumInsertStates=2; + static int NumMatrixTypes; + static int NumInsertStates; static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, ***TinsProb, *TmatchProb; static int TinsProb_ml, TmatchProb_ml; int i, j,I, J; - float *F, *B, *P; - float tot; - int l, s1, s2; + float *F, *B; + + int l; float thr=0.01;//ProbCons Default char *alphabet; @@ -638,25 +647,92 @@ int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) return 0; } - if (!transMat && (strm (retrieve_seq_type(), "DNA") ||strm (retrieve_seq_type(), "RNA")) ) + if (!transMat && (strm (retrieve_seq_type(), "DNA"))) + { + static float **p; + static float *s; + NumInsertStates=1; + NumMatrixTypes=3; + if (!p) + { + int l,a,b; + l=strlen (DNAalphabetDefault); + p=declare_float (l,l); + s=vcalloc (l, sizeof (float)); + for (a=0; amethod_matrix, DNAalphabetDefault,p,s); + alphabet=RNAalphabetDefault; + emitPairs=declare_float (256, 256); + emitSingle=vcalloc (256, sizeof (float)); + for (i=0; i<256; i++) + { + emitSingle[i]=1e-5; + for (j=0; j<256; j++) + emitPairs[i][j]=1e-10; + } + l=strlen (alphabet); + + for (i=0; imethod_matrix, DNAalphabetDefault,p,s); - alphabet=DNAalphabetDefault; + p=get_emitPairs (CL->method_matrix, RNAalphabetDefault,p,s); + alphabet=RNAalphabetDefault; emitPairs=declare_float (256, 256); emitSingle=vcalloc (256, sizeof (float)); for (i=0; i<256; i++) @@ -669,7 +745,7 @@ int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) for (i=0; iS,s1); NA2=seq2R_template_profile (CL->S,s2); + sst1=seq2T_template_string((CL->S),s1); + sst2=seq2T_template_string((CL->S),s2); + if (NA1 || NA2) { if (NA1) @@ -827,6 +916,8 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f nls[0]=vcalloc (NA1->nseq, sizeof (int)); for (a=0; anseq; a++) nls[0][a]=a; + NA1->seq_al[NA1->nseq]=sst1; + sprintf (NA1->name[NA1->nseq], "sst1"); } else { @@ -842,7 +933,9 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f nns[1]=NA2->nseq; nls[1]=vcalloc (NA2->nseq, sizeof (int)); for (a=0; anseq; a++) - nls[1][a]=a; + nls[1][a]=a; + NA2->seq_al[NA2->nseq]=sst2; + sprintf (NA2->name[NA2->nseq], "sst2"); } else { @@ -858,7 +951,15 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f return 1; } } - + if ( A1!=A2) + { + if (strm (A1->name[A1->nseq], "sst1"))ss1=A1->seq_al[A1->nseq]; + if (strm (A2->name[A2->nseq], "sst2"))ss2=A2->seq_al[A2->nseq]; + uss=(ss1&&ss2)?1:0; + } + else + uss=0; + I=strlen (A1->seq_al[ls[0][0]]); J=strlen (A2->seq_al[ls[1][0]]); @@ -911,7 +1012,7 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f { int in; c1=tolower(A1->seq_al[ls[0][b]][i]); - if ( c1=='-')continue; + if ( c1=='-' || c1=='.' || c1=='~')continue; c1-='a'; if (!(in=observed[c1])){in=observed[c1]=++index;} @@ -954,6 +1055,13 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f if (i==0 || j==0); else { + float sfac; + if (!uss)sfac=1; + else if (ss1[i-1]!=ss2[j-1])sfac=1; + else if (ss1[i-1]==ss2[j-1])sfac=1; + else sfac=1; + + c=0; while (VA1[0][c][i-1]!=-1) { @@ -964,7 +1072,7 @@ int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, f { c2=VA2[0][d][j-1]+'a'; n2=VA2[1][d][j-1]; - TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2; + TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2*sfac; n+=n1*n2; d++; } @@ -1042,7 +1150,8 @@ Constraint_list *ProbaMatrix2CL (Alignment *A, int *ns, int **ls, int NumMatrixT } sort_int_inv (list, 3, 2, 0, list_n-1); - if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size); + if (!entry)entry=vcalloc ( CL->entry_len+1, CL->el_size); + list_n=MIN(list_n,(F*MIN(I,J))); for (i=0; iCL; } -Constraint_list *ProbaMatrix2CL_old (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL) -{ - float totalProb; - int ij, i, j,k, I, J, s1, s2; - static int *entry; - int lib_size=0; - double v; - - I=strlen (A->seq_al[ls[0][0]]); - J=strlen (A->seq_al[ls[1][0]]); - s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100); - s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100); - - totalProb = ComputeTotalProbability (I,J,NumMatrixTypes, NumInsertStates,forward, backward); - if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size); - ij = 0; - thr=0.01; - - for (ij=0,i =0; i <= I; i++) - { - for (j =0; j <= J; j++) - { - v= EXP (MIN(LOG_ONE,(forward[ij] + backward[ij] - totalProb))); - if (i && j && v>=thr) - - { - entry[SEQ1]=s1;entry[SEQ2]=s2; - entry[R1]=i;entry[R2]=j; - entry[WE]=(int)((float)v*(float)NORM_F); - entry[CONS]=1; - add_entry2list (entry,A->CL); - lib_size++; - } - ij += NumMatrixTypes; - } - } - HERE ("LIB_SIZE_OLD: %d", lib_size); - return A->CL; -} float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward) { @@ -1365,7 +1435,7 @@ int ProbabilisticModel (int NumMatrixTypes, int NumInsertStates,float *initDistr int viterbi_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL) { - char C1,c1, C2,c2; + int C1,c1, C2,c2; char *alphabet, *char_buf; char **al, **aln; int seq1Length, seq2Length, I, J; @@ -1624,11 +1694,11 @@ float ** get_emitPairs (char *mat, char *alp, float **p, float *s) return p; } -/*********************************COPYRIGHT NOTICE**********************************/ +/******************************COPYRIGHT NOTICE*******************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ -/*Tue Oct 27 10:12:26 WEST 2009. */ +/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ @@ -1652,4 +1722,4 @@ float ** get_emitPairs (char *mat, char *alp, float **p, float *s) /**/ /**/ /* */ -/*********************************COPYRIGHT NOTICE**********************************/ +/******************************COPYRIGHT NOTICE*******************************/