//static float LOG_ZERO = -FLT_MAX;
static float LOG_ZERO=-200000004008175468544.000000;
static float LOG_ONE = 0.0f;
+//DNA Alignment Models
+static float DNAinitDistrib2Default[] ={ 0.9588437676f, 0.0205782652f, 0.0205782652f };
+static float DNAgapOpen2Default[] = { 0.0190259293f, 0.0190259293f };
+static float DNAgapExtend2Default[] = { 0.3269913495f, 0.3269913495f };
-//Probabilistic Alignment DNA
-
-
-
-
-static float DNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f };
-static float DNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f };
-static float DNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f };
-
-static char DNAalphabetDefault[] = "ACGUTN";
+static char DNAalphabetDefault[] = "ACGUTN";
static float DNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f};
static float DNAemitPairsDefault[6][6] = {
{ 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
{ 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f }
};
+//RNA Alignment Models
-//Probabilistic ALignment Blosum62mt
-
+static float RNAinitDistrib2Default[] = { 0.9615409374f, 0.0000004538f, 0.0000004538f, 0.0192291681f, 0.0192291681f };
+static float RNAgapOpen2Default[] = { 0.0082473317f, 0.0082473317f, 0.0107844425f, 0.0107844425f };
+static float RNAgapExtend2Default[] = { 0.3210460842f, 0.3210460842f, 0.3298229277f, 0.3298229277f };
+static char RNAalphabetDefault[] = "ACGUTN";
+static float RNAemitSingleDefault[6] = {0.2270790040f, 0.2422080040f, 0.2839320004f, 0.2464679927f, 0.2464679927f, 0.0003124650f};
+static float RNAemitPairsDefault[6][6] = {
+ { 0.1487240046f, 0.0184142999f, 0.0361397006f, 0.0238473993f, 0.0238473993f, 0.0000375308f },
+ { 0.0184142999f, 0.1583919972f, 0.0275536999f, 0.0389291011f, 0.0389291011f, 0.0000815823f },
+ { 0.0361397006f, 0.0275536999f, 0.1979320049f, 0.0244289003f, 0.0244289003f, 0.0000824765f },
+ { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
+ { 0.0238473993f, 0.0389291011f, 0.0244289003f, 0.1557479948f, 0.1557479948f, 0.0000743985f },
+ { 0.0000375308f, 0.0000815823f, 0.0000824765f, 0.0000743985f, 0.0000743985f, 0.0000263252f }
+};
+//Protein Alignment Models
static float initDistrib2Default[] = { 0.6814756989f, 8.615339902e-05f, 8.615339902e-05f, 0.1591759622f, 0.1591759622 };
static float gapOpen2Default[] = { 0.0119511066f, 0.0119511066f, 0.008008334786f, 0.008008334786 };
static float gapExtend2Default[] = { 0.3965826333f, 0.3965826333f, 0.8988758326f, 0.8988758326 };
id=idscore_pairseq(seqI,seqJ,-12, -1, CL->M, "idmat");
- entry=vcalloc ( CL->entry_len, CL->el_size);
+ entry=vcalloc ( CL->entry_len+1, CL->el_size);
entry[SEQ1]=s1;entry[SEQ2]=s2;
thres=opt;
entry[R1]=i;entry[R2]=j;
entry[WE]=id;
entry[CONS]=1;
+
add_entry2list (entry,A->CL);
}
}
}
int proba_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
{
- int NumMatrixTypes=5;
- int NumInsertStates=2;
+ static int NumMatrixTypes;
+ static int NumInsertStates;
static float **transMat, **insProb, **matchProb, *initialDistribution, **transProb, **emitPairs, *emitSingle, ***TinsProb, *TmatchProb;
static int TinsProb_ml, TmatchProb_ml;
int i, j,I, J;
- float *F, *B, *P;
- float tot;
- int l, s1, s2;
+ float *F, *B;
+
+ int l;
float thr=0.01;//ProbCons Default
char *alphabet;
return 0;
}
- if (!transMat && (strm (retrieve_seq_type(), "DNA") ||strm (retrieve_seq_type(), "RNA")) )
+ if (!transMat && (strm (retrieve_seq_type(), "DNA")))
+ {
+ static float **p;
+ static float *s;
+ NumInsertStates=1;
+ NumMatrixTypes=3;
+ if (!p)
+ {
+ int l,a,b;
+ l=strlen (DNAalphabetDefault);
+ p=declare_float (l,l);
+ s=vcalloc (l, sizeof (float));
+ for (a=0; a<l; a++)
+ {
+ s[a]=DNAemitSingleDefault[a];
+ for (b=0; b<l; b++)
+ p[a][b]=RNAemitPairsDefault[a][b];
+ }
+ }
+ p=get_emitPairs (CL->method_matrix, DNAalphabetDefault,p,s);
+ alphabet=RNAalphabetDefault;
+ emitPairs=declare_float (256, 256);
+ emitSingle=vcalloc (256, sizeof (float));
+ for (i=0; i<256; i++)
+ {
+ emitSingle[i]=1e-5;
+ for (j=0; j<256; j++)
+ emitPairs[i][j]=1e-10;
+ }
+ l=strlen (alphabet);
+
+ for (i=0; i<l; i++)
+ {
+ int C1,c1, C2,c2;
+ c1=tolower(alphabet[i]);
+ C1=toupper(alphabet[i]);
+ emitSingle[c1]=s[i];
+ emitSingle[C1]=s[i];
+ for (j=0; j<=i; j++)
+ {
+ c2=tolower(alphabet[j]);
+ C2=toupper(alphabet[j]);
+
+ emitPairs[c1][c2]=p[i][j];
+ emitPairs[C1][c2]=p[i][j];
+ emitPairs[C1][C2]=p[i][j];
+ emitPairs[c1][C2]=p[i][j];
+ emitPairs[c2][c1]=p[i][j];
+ emitPairs[C2][c1]=p[i][j];
+ emitPairs[C2][C1]=p[i][j];
+ emitPairs[c2][C1]=p[i][j];
+ }
+ }
+
+
+ transMat=declare_float (2*NumInsertStates+1, 2*NumInsertStates+1);
+ transProb=declare_float (2*NumInsertStates+1,2* NumInsertStates+1);
+ insProb=declare_float (256,NumMatrixTypes);
+ matchProb=declare_float (256, 256);
+ initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
+
+ ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,DNAgapOpen2Default,DNAgapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
+
+ }
+ else if (!transMat && (strm (retrieve_seq_type(), "RNA")))
{
static float **p;
static float *s;
+ NumInsertStates=2;
+ NumMatrixTypes=5;
+
if (!p)
{
int l,a,b;
- l=strlen (DNAalphabetDefault);
+ l=strlen (RNAalphabetDefault);
p=declare_float (l,l);
s=vcalloc (l, sizeof (float));
for (a=0; a<l; a++)
{
- s[a]=DNAemitSingleDefault[a];
+ s[a]=RNAemitSingleDefault[a];
for (b=0; b<l; b++)
- p[a][b]=DNAemitPairsDefault[a][b];
+ p[a][b]=RNAemitPairsDefault[a][b];
}
}
- p=get_emitPairs (CL->method_matrix, DNAalphabetDefault,p,s);
- alphabet=DNAalphabetDefault;
+ p=get_emitPairs (CL->method_matrix, RNAalphabetDefault,p,s);
+ alphabet=RNAalphabetDefault;
emitPairs=declare_float (256, 256);
emitSingle=vcalloc (256, sizeof (float));
for (i=0; i<256; i++)
for (i=0; i<l; i++)
{
- char C1,c1, C2,c2;
+ int C1,c1, C2,c2;
c1=tolower(alphabet[i]);
C1=toupper(alphabet[i]);
emitSingle[c1]=s[i];
matchProb=declare_float (256, 256);
initialDistribution=vcalloc (2*NumMatrixTypes+1, sizeof (float));
- ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,DNAgapOpen2Default,DNAgapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
+ ProbabilisticModel (NumMatrixTypes,NumInsertStates,initDistrib2Default, emitSingle,emitPairs,RNAgapOpen2Default,RNAgapExtend2Default, transMat,initialDistribution,matchProb, insProb,transProb);
}
else if ( !transMat && strm (retrieve_seq_type(), "PROTEIN"))
{
static float **p;
static float *s;
+ NumInsertStates=2;
+ NumMatrixTypes=5;
+
if (!p)
{
int l,a,b;
for (i=0; i<l; i++)
{
- char C1,c1, C2,c2;
+ int C1,c1, C2,c2;
c1=tolower(alphabet[i]);
C1=toupper(alphabet[i]);
emitSingle[c1]=s[i];
int get_tot_prob (Alignment *A1,Alignment *A2, int *ns, int **ls, int nstates, float **matchProb, float **insProb, float *TmatchProb, float ***TinsProb, Constraint_list *CL)
{
int i, j, a, b, c,d, k, n,n1,n2, ij;
- char c1, c2;
+ int c1, c2;
int I, J;
int ***VA1,***VA2, *observed, index;
+ char *ss1=NULL;
+ char *ss2=NULL;
+ int uss=0;
+
//Pre-computation of the pairwise scores in order to use potential profiles
//The profiles are vectorized AND Compressed so that the actual alphabet size (proteins/DNA) does not need to be considered
int s1, s2;
int *nns, **nls;
Alignment *NA1, *NA2;
+ char *sst1;
+ char *sst2;
+
nns=vcalloc ( 2, sizeof (int));
nls=vcalloc (2, sizeof (int*));
NA1=seq2R_template_profile (CL->S,s1);
NA2=seq2R_template_profile (CL->S,s2);
+ sst1=seq2T_template_string((CL->S),s1);
+ sst2=seq2T_template_string((CL->S),s2);
+
if (NA1 || NA2)
{
if (NA1)
nls[0]=vcalloc (NA1->nseq, sizeof (int));
for (a=0; a<NA1->nseq; a++)
nls[0][a]=a;
+ NA1->seq_al[NA1->nseq]=sst1;
+ sprintf (NA1->name[NA1->nseq], "sst1");
}
else
{
nns[1]=NA2->nseq;
nls[1]=vcalloc (NA2->nseq, sizeof (int));
for (a=0; a<NA2->nseq; a++)
- nls[1][a]=a;
+ nls[1][a]=a;
+ NA2->seq_al[NA2->nseq]=sst2;
+ sprintf (NA2->name[NA2->nseq], "sst2");
}
else
{
return 1;
}
}
-
+ if ( A1!=A2)
+ {
+ if (strm (A1->name[A1->nseq], "sst1"))ss1=A1->seq_al[A1->nseq];
+ if (strm (A2->name[A2->nseq], "sst2"))ss2=A2->seq_al[A2->nseq];
+ uss=(ss1&&ss2)?1:0;
+ }
+ else
+ uss=0;
+
I=strlen (A1->seq_al[ls[0][0]]);
J=strlen (A2->seq_al[ls[1][0]]);
{
int in;
c1=tolower(A1->seq_al[ls[0][b]][i]);
- if ( c1=='-')continue;
+ if ( c1=='-' || c1=='.' || c1=='~')continue;
c1-='a';
if (!(in=observed[c1])){in=observed[c1]=++index;}
if (i==0 || j==0);
else
{
+ float sfac;
+ if (!uss)sfac=1;
+ else if (ss1[i-1]!=ss2[j-1])sfac=1;
+ else if (ss1[i-1]==ss2[j-1])sfac=1;
+ else sfac=1;
+
+
c=0;
while (VA1[0][c][i-1]!=-1)
{
{
c2=VA2[0][d][j-1]+'a';
n2=VA2[1][d][j-1];
- TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2;
+ TmatchProb[ij]+=matchProb[c1][c2]*(double)n1*(double)n2*sfac;
n+=n1*n2;
d++;
}
}
sort_int_inv (list, 3, 2, 0, list_n-1);
- if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size);
+ if (!entry)entry=vcalloc ( CL->entry_len+1, CL->el_size);
+
list_n=MIN(list_n,(F*MIN(I,J)));
for (i=0; i<list_n; i++)
{
return A->CL;
}
-Constraint_list *ProbaMatrix2CL_old (Alignment *A, int *ns, int **ls, int NumMatrixTypes, int NumInsertStates, float *forward, float *backward, float thr, Constraint_list *CL)
-{
- float totalProb;
- int ij, i, j,k, I, J, s1, s2;
- static int *entry;
- int lib_size=0;
- double v;
-
- I=strlen (A->seq_al[ls[0][0]]);
- J=strlen (A->seq_al[ls[1][0]]);
- s1=name_is_in_list (A->name[ls[0][0]], (CL->S)->name, (CL->S)->nseq, 100);
- s2=name_is_in_list (A->name[ls[1][0]], (CL->S)->name, (CL->S)->nseq, 100);
-
- totalProb = ComputeTotalProbability (I,J,NumMatrixTypes, NumInsertStates,forward, backward);
- if (!entry)entry=vcalloc ( CL->entry_len, CL->el_size);
- ij = 0;
- thr=0.01;
-
- for (ij=0,i =0; i <= I; i++)
- {
- for (j =0; j <= J; j++)
- {
- v= EXP (MIN(LOG_ONE,(forward[ij] + backward[ij] - totalProb)));
- if (i && j && v>=thr)
-
- {
- entry[SEQ1]=s1;entry[SEQ2]=s2;
- entry[R1]=i;entry[R2]=j;
- entry[WE]=(int)((float)v*(float)NORM_F);
- entry[CONS]=1;
- add_entry2list (entry,A->CL);
- lib_size++;
- }
- ij += NumMatrixTypes;
- }
- }
- HERE ("LIB_SIZE_OLD: %d", lib_size);
- return A->CL;
-}
float ComputeTotalProbability (int seq1Length, int seq2Length,int NumMatrixTypes, int NumInsertStates,float *forward, float *backward)
{
int viterbi_pair_wise ( Alignment *A, int *ns, int **ls, Constraint_list *CL)
{
- char C1,c1, C2,c2;
+ int C1,c1, C2,c2;
char *alphabet, *char_buf;
char **al, **aln;
int seq1Length, seq2Length, I, J;
return p;
}
-/*********************************COPYRIGHT NOTICE**********************************/
+/******************************COPYRIGHT NOTICE*******************************/
/*© Centro de Regulacio Genomica */
/*and */
/*Cedric Notredame */
-/*Tue Oct 27 10:12:26 WEST 2009. */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/**/
/**/
/* */
-/*********************************COPYRIGHT NOTICE**********************************/
+/******************************COPYRIGHT NOTICE*******************************/