/* Implementation of the Wozniak "anti-diagonal" vectorization strategy for Smith-Waterman comparison, Wozniak (1997) Comp. Appl. Biosci. 13:145-150 November, 2004 */ /* Written by Erik Lindahl, Stockholm Bioinformatics Center, 2004. Please send bug reports and/or suggestions to lindahl@sbc.su.se. */ #include #include "defs.h" #include "param.h" #include "dropgsw.h" #ifdef SW_ALTIVEC int smith_waterman_altivec_word(unsigned char * query_sequence, unsigned short * query_profile_word, int query_length, unsigned char * db_sequence, int db_length, unsigned short bias, unsigned short gap_open, unsigned short gap_extend, struct f_struct * f_str) { int i,j,k; unsigned short * p; unsigned short score; unsigned char * p_dbseq; int alphabet_size = f_str->alphabet_size; unsigned short * workspace = (unsigned short *)f_str->workspace; vector unsigned short Fup,Hup1,Hup2,E,F,H,tmp; vector unsigned char perm; vector unsigned short v_maxscore; vector unsigned short v_bias,v_gapopen,v_gapextend; vector unsigned short v_score; vector unsigned short v_score_q1; vector unsigned short v_score_q2; vector unsigned short v_score_q3; vector unsigned short v_score_load; vector unsigned char queue1_to_score = (vector unsigned char)(16,17,2,3,4,5,6,7,8,9,10,11,12,13,14,15); vector unsigned char queue2_to_queue1 = (vector unsigned char)(0,1,18,19,4,5,6,7,8,9,10,11,12,13,14,15); vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,16,16,16,16,21,16,0,16,1,16,2,16,3,16,4); vector unsigned char queue3_with_load = (vector unsigned char)(23,5,6,7,8,25,9,10,11,27,12,13,29,14,31,16); /* Load the bias to all elements of a constant */ v_bias = vec_lde(0,&bias); perm = vec_lvsl(0,&bias); v_bias = vec_perm(v_bias,v_bias,perm); v_bias = vec_splat(v_bias,0); /* Load gap opening penalty to all elements of a constant */ v_gapopen = vec_lde(0,&gap_open); perm = vec_lvsl(0,&gap_open); v_gapopen = vec_perm(v_gapopen,v_gapopen,perm); v_gapopen = vec_splat(v_gapopen,0); /* Load gap extension penalty to all elements of a constant */ v_gapextend = vec_lde(0,&gap_extend); perm = vec_lvsl(0,&gap_extend); v_gapextend = vec_perm(v_gapextend,v_gapextend,perm); v_gapextend = vec_splat(v_gapextend,0); v_maxscore = vec_xor(v_maxscore,v_maxscore); // Zero out the storage vector k = 2*(db_length+7); for(i=0,j=0;ialphabet_size; unsigned char * workspace = (unsigned char *)f_str->workspace; vector unsigned char Fup,Hup1,Hup2,E,F,H,tmp; vector unsigned char perm; vector unsigned char v_maxscore; vector unsigned char v_bias,v_gapopen,v_gapextend; vector unsigned char v_score; vector unsigned char v_score_q1; vector unsigned char v_score_q2; vector unsigned char v_score_q3; vector unsigned char v_score_q4; vector unsigned char v_score_q5; vector unsigned char v_score_load1; vector unsigned char v_score_load2; vector unsigned char v_zero; vector unsigned char queue1_to_score = (vector unsigned char)(16,1,2,3,4,5,6,7,24,9,10,11,12,13,14,15); vector unsigned char queue2_to_queue1 = (vector unsigned char)(16,17,2,3,4,5,6,7,24,25,10,11,12,13,14,15); vector unsigned char queue3_to_queue2 = (vector unsigned char)(16,17,18,3,4,5,6,7,24,25,26,11,12,13,14,15); vector unsigned char queue4_to_queue3 = (vector unsigned char)(16,17,18,19,4,5,6,7,24,25,26,27,12,13,14,15); vector unsigned char queue5_to_queue4 = (vector unsigned char)(16,17,18,19,20,2,3,4,24,25,26,27,28,10,11,12); vector unsigned char queue5_with_load = (vector unsigned char)(19,20,21,5,6,22,7,23,27,28,29,13,14,30,15,31); vector unsigned char merge_score_load = (vector unsigned char)(0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); v_zero = vec_splat_u8(0); /* Load the bias to all elements of a constant */ v_bias = vec_lde(0,&bias); perm = vec_lvsl(0,&bias); v_bias = vec_perm(v_bias,v_bias,perm); v_bias = vec_splat(v_bias,0); /* Load gap opening penalty to all elements of a constant */ v_gapopen = vec_lde(0,&gap_open); perm = vec_lvsl(0,&gap_open); v_gapopen = vec_perm(v_gapopen,v_gapopen,perm); v_gapopen = vec_splat(v_gapopen,0); /* Load gap extension penalty to all elements of a constant */ v_gapextend = vec_lde(0,&gap_extend); perm = vec_lvsl(0,&gap_extend); v_gapextend = vec_perm(v_gapextend,v_gapextend,perm); v_gapextend = vec_splat(v_gapextend,0); v_maxscore = vec_xor(v_maxscore,v_maxscore); // Zero out the storage vector k = (db_length+15); for(i=0,j=0;i