3 #include "diaglist.h"
\r
7 #define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)
\r
8 const unsigned K = 7;
\r
9 const unsigned KTUPS = pow4(K);
\r
10 static unsigned TuplePos[KTUPS];
\r
12 static char *TupleToStr(int t)
\r
16 for (int i = 0; i < K; ++i)
\r
18 unsigned Letter = (t/(pow4(i)))%4;
\r
19 assert(Letter >= 0 && Letter < 4);
\r
20 s[K-i-1] = LetterToChar(Letter);
\r
26 static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
\r
30 for (unsigned i = 0; i < K; ++i)
\r
32 const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
\r
33 if (RESIDUE_GROUP_MULTIPLE == uLetter)
\r
41 void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
\r
42 unsigned uLengthY, DiagList &DL)
\r
44 if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
\r
45 Quit("FindDiagsNuc: requires nucleo alphabet");
\r
49 // 16 is arbitrary slop, no principled reason for this.
\r
50 if (uLengthX < K + 16 || uLengthY < K + 16)
\r
53 // Set A to shorter profile, B to longer
\r
59 if (uLengthX < uLengthY)
\r
64 uLengthA = uLengthX;
\r
65 uLengthB = uLengthY;
\r
72 uLengthA = uLengthY;
\r
73 uLengthB = uLengthX;
\r
77 Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
\r
80 // Build tuple map for the longer profile, B
\r
82 Quit("FindDiags: profile too short");
\r
84 memset(TuplePos, EMPTY, sizeof(TuplePos));
\r
86 for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
\r
88 const unsigned uTuple = GetTuple(PB, uPos);
\r
89 if (EMPTY == uTuple)
\r
91 TuplePos[uTuple] = uPos;
\r
95 for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
\r
97 const unsigned uTuple = GetTuple(PA, uPosA);
\r
98 if (EMPTY == uTuple)
\r
100 const unsigned uPosB = TuplePos[uTuple];
\r
101 if (EMPTY == uPosB)
\r
104 // This tuple is found in both profiles
\r
105 unsigned uStartPosA = uPosA;
\r
106 unsigned uStartPosB = uPosB;
\r
108 // Try to extend the match forwards
\r
109 unsigned uEndPosA = uPosA + K - 1;
\r
110 unsigned uEndPosB = uPosB + K - 1;
\r
113 if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
\r
115 const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
\r
116 if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
\r
118 const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
\r
119 if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
\r
121 if (uAAGroupA != uAAGroupB)
\r
130 Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
\r
131 for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
\r
132 Log("%c", LetterToChar(PA[n].m_uResidueGroup));
\r
134 Log(" B %4u-%4u ", uStartPosB, uEndPosB);
\r
135 for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
\r
136 Log("%c", LetterToChar(PB[n].m_uResidueGroup));
\r
141 const unsigned uLength = uEndPosA - uStartPosA + 1;
\r
142 assert(uEndPosB - uStartPosB + 1 == uLength);
\r
144 if (uLength >= g_uMinDiagLength)
\r
147 DL.Add(uStartPosB, uStartPosA, uLength);
\r
149 DL.Add(uStartPosA, uStartPosB, uLength);
\r