********************************************************************/
/*
- * RCS $Id: pair_dist.c 242 2011-05-27 14:04:21Z andreas $
+ * RCS $Id: pair_dist.c 301 2016-06-13 13:32:55Z fabian $
*/
#ifdef HAVE_CONFIG_H
#include "progress.h"
#include "util.h"
+/* Made iend/jend const unsigned long int (originally just int), FS, 2016-04-04
+ */
+
/* Up to rev 173 we had a USE_SYM_KTUPLE switch implemented here. When active
* ktuple distances were computed twice for each pair and averaged. Idea was
static int
SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq,
- int istart, int iend,
- int jstart, int jend,
+ int istart, const unsigned long int iend,
+ int jstart, const unsigned long int jend,
bool use_KimuraCorrection, progress_t *prProgress,
unsigned long int *ulStepNo, unsigned long int ulTotalStepNo);
*/
int
SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq,
- int istart, int iend,
- int jstart, int jend,
+ int istart, const unsigned long int iend,
+ int jstart, const unsigned long int jend,
bool use_kimura, progress_t *prProgress,
unsigned long int *ulStepNo, unsigned long int ulTotalStepNo)
{
*
*/
int
-PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type,
- int istart, int iend,
- int jstart, int jend,
+PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPercID,
+ int istart, const unsigned long int iend,
+ int jstart, const unsigned long int jend,
char *fdist_in, char *fdist_out)
{
int uSeqIndex;
assert(istart<iend);
assert(jstart<jend);
- if (NewSymMatrix(distmat, iend, jend)!=0) {
- Log(&rLog, LOG_FATAL, "%s", "Memory allocation for distance matrix failed");
- }
-
/* compute pairwise distances or read from file
*
#include "random-dist.h"
#else
if (NULL != fdist_in) {
- Log(&rLog, LOG_FATAL, "FIXME: reading of distance matrix from file not implemented");
+ Log(&rLog, LOG_WARN,
+ "Please use distance matrix input only, if you know exactly what you're doing!");
+ if (SymMatrixRead(fdist_in, distmat, mseq)) {
+ Log(&rLog, LOG_FATAL, "%s", "Reading distance matrix failed");
+ }
} else {
+ if (NewSymMatrix(distmat, iend, jend)!=0) {
+ Log(&rLog, LOG_FATAL, "%s", "Memory allocation for distance matrix failed");
+ }
+
/* break into chunks, one for each thread
matrix is a triangle, not a square
hence making even chunk sizes is slightly fiddlier
*/
ulTotalStepNo = iend*jend - iend*iend/2 + iend/2;
-
+
/* FIXME: can get rid of iChunkStart, iChunkEnd now that we're using the arrays */
iChunkStart = iend;
for(iChunk = 0; iChunk <= iNumberOfThreads; iChunk++)
{
iChunkEnd = iChunkStart;
- if(iChunk == iNumberOfThreads - 1)
+ if (iChunk == iNumberOfThreads - 1){
iChunkStart = 0;
- else
+ }
+ else if (iend == jend){
iChunkStart = iend - ((double)(iend - istart) * sqrt(((double)iChunk + 1.0)/(double)iNumberOfThreads));
+ }
+ else {
+ iChunkStart = iend - (iend - istart) * (iChunk + 1) / (double)(iNumberOfThreads);
+ }
iChunkStarts[iChunk] = iChunkStart;
iChunkEnds[iChunk] = iChunkEnd;
+ /*printf("%s:%d: C=%d, ie=%d, is=%d, je=%d, js=%d, Cstart=%d, Cend=%d, diff=%d\n",
+ __FILE__, __LINE__, iChunk, iend, istart, jend, jstart, iChunkStart, iChunkEnd, iChunkEnd-iChunkStart);*/
}
if (PAIRDIST_KTUPLE == pairdist_type) {
Log(&rLog, LOG_INFO, "Calculating pairwise ktuple-distances...");
-
+
NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO),
- "Ktuple-distance calculation progress", bPrintCR);
+ "Ktuple-distance calculation progress", bPrintCR);
#ifdef HAVE_OPENMP
#pragma omp parallel for private(iChunk) schedule(dynamic)
#endif
}
#endif /* random/proper distance calculation */
-
+
/* optional printing of matrix to file
*/
if (NULL != fdist_out) {
names[uSeqIndex] = mseq->sqinfo[uSeqIndex].name;
}
- SymMatrixPrint((*distmat), names, fdist_out);
+ SymMatrixPrint((*distmat), names, fdist_out, bPercID);
Log(&rLog, LOG_INFO, "Pairwise distance matrix written to %s",
fdist_out);
ProgressDone(prProgress);
FreeProgress(&prProgress);
}
-
+
return 0;
}
/*** end: PairDistances() ***/