1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
6 * Copyright (C) 2010 University College Dublin
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This file is part of Clustal-Omega.
15 ********************************************************************/
18 * RCS $Id: symmatrix.c 230 2011-04-09 15:37:50Z andreas $
21 * Functions for symmetric (square) matrices including diagonal.
22 * supports the notion of non-square sub-matrices of a symmetric
23 * matrix, i.e. where |rows|<|cols|.
25 * FIXME Allocating one big chunk of memory is probably
26 * much faster and also easier to maintain.
38 #include "symmatrix.h"
45 #define MAX_BUF_SIZE 65536
48 * @brief Allocates symmat and its members and initialises them. Data
49 * will be calloced, i.e. initialised with zeros.
52 * newly allocated and initialised symmatrix instance
56 * ncols number of columns
58 * @return: non-zero on error
60 * @see FreeSymMatrix()
62 * @note: symmat data will be of fake shape nrows x ncols
66 NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols)
70 assert(nrows>0 && ncols>0 && ncols>=nrows);
71 assert(ncols>0 && ncols>=nrows);
73 (*symmat) = (symmatrix_t *) malloc(1*sizeof(symmatrix_t));
74 if (NULL == (*symmat)) {
75 fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
76 __FILE__, __FUNCTION__);
80 (*symmat)->data = (double **) malloc(nrows * sizeof(double *));
81 if (NULL == (*symmat)->data) {
82 fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
83 __FILE__, __FUNCTION__);
88 for (i=0; i<nrows; i++) {
89 (*symmat)->data[i] = (double *) calloc((ncols-i), sizeof(double));
90 if (NULL == (*symmat)->data[i]) {
91 fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
92 __FILE__, __FUNCTION__);
94 free((*symmat)->data[i]);
96 free((*symmat)->data);
102 fprintf(stderr, "DEBUG(%s|%s():%d) initialising symmat with the number of the beast\n",
103 __FILE__, __FUNCTION__, __LINE__);
106 for (j=0; j<ncols-i; j++) {
107 (*symmat)->data[i][j] = -666.0;
114 (*symmat)->nrows = nrows;
115 (*symmat)->ncols = ncols;
119 /*** end: new_symmatrix ***/
123 * @brief Sets symmat data of given index to given value
126 * symmatrix_t whose data is to be set
132 * value used to set data point
134 * @see SymMatrixGetValue()
136 * @note This is a convenience function that checks index order.
140 SymMatrixSetValue(symmatrix_t *symmat, const int i, const int j, const double value)
142 assert(NULL != symmat);
145 assert(i < symmat->nrows && j < symmat->ncols);
146 symmat->data[i][j-i] = value;
148 assert(j < symmat->nrows && i < symmat->ncols);
149 symmat->data[j][i-j] = value;
152 /*** end: symmatrix_setvalue ***/
157 * @brief Returns element of symmat corresponding to given indices
160 * symmatrix_t of question
166 * @return requested value
168 * @see SymMatrixSetValue()
170 * @note This is a convenience function that checks index order.
173 SymMatrixGetValue(symmatrix_t *symmat, const int i, const int j)
175 assert(NULL != symmat);
178 assert(i < symmat->nrows && j < symmat->ncols);
179 return symmat->data[i][j-i];
181 assert(j < symmat->nrows && i < symmat->ncols);
182 return symmat->data[j][i-j];
185 /*** end: symmatrix_getvalue ***/
192 * @brief Returns a pointer to an element of symmat corresponding to
198 * symmatrix_t of question
204 * @return pointer to value
206 * @see SymMatrixGetValue()
208 * @note This is a convenience function that checks index order.
212 SymMatrixGetValueP(double **val, symmatrix_t *symmat,
213 const int i, const int j)
215 assert(NULL != symmat);
218 assert(i < symmat->nrows && j < symmat->ncols);
219 *val = &(symmat->data[i][j-i]);
221 assert(j < symmat->nrows && i < symmat->ncols);
222 *val = &(symmat->data[j][i-j]);
225 /*** end: symmatrix_getvaluep ***/
229 * @brief Frees memory allocated by data members of symmat and symmat
233 * symmatrix_t to be freed
235 * @note Use in conjunction with NewSymMatrix()
237 * @see NewSymMatrix()
241 FreeSymMatrix(symmatrix_t **symmat)
244 if (NULL != (*symmat)) {
245 if (NULL != (*symmat)->data) {
246 for (i=0; i<(*symmat)->nrows; i++) {
247 free((*symmat)->data[i]);
249 free((*symmat)->data);
255 /*** end: free_symmatrix ***/
260 * @brief Print out a symmat in phylip style. Distances are printed on
261 * one line per sequence/object. Since we also support matrices with
262 * rows\<cols, the first line can also be nrows by ncolumns instead
266 * the symmatrix_t to print
268 * sequence/objects labels/names. must be at least of length symmat nrows
270 * filename or NULL. If NULL stdout will be used.
274 SymMatrixPrint(symmatrix_t *symmat, char **labels, const char *path)
278 int max_label_len = 0;
280 if (NULL==symmat || NULL==labels) {
282 "One of the provided arguments is empty or NULL (print_matrix)\n");
288 if (NULL==(fp=fopen(path, "w"))) {
289 fprintf(stderr, "Couldn't open %s for writing.", path);
294 /* find maximum label length for pretty printing
296 for (i=0; i<symmat->nrows; i++) {
298 assert(NULL != labels[i]);
299 this_len = strlen(labels[i]);
300 if (this_len>max_label_len)
301 max_label_len = this_len;
304 if (symmat->ncols==symmat->nrows) {
305 fprintf(fp, "%u\n", symmat->ncols);
307 /* this is not standard phylip, but necessary to indicate
309 fprintf(fp, "%u x %u\n", symmat->nrows, symmat->ncols);
311 for (i=0; i<symmat->nrows; i++) {
312 /* phylip restriction is 10 characters. we don't care here
314 fprintf(fp, "%-*s", max_label_len, labels[i]);
315 /* we are lazy and do it like fastdist: write all distances
316 * for one seq to one line
318 for (j=0; j<symmat->ncols; j++) {
319 fprintf(fp, " %f", SymMatrixGetValue(symmat, i, j));
321 fprintf(fp, "%s", "\n");
329 /*** end: SymMatrixPrint ***/
335 * @brief Read a distance matrix in phylip format
337 * @param[in] pcFileIn
338 * distance matrix filename
339 * @param[out] prSymMat_p
340 * the symmatrix_t. will be allocated here.
345 * FIXME untested code
348 SymMatrixRead(char *pcFileIn, symmatrix_t **prSymMat_p)
352 /* number of parsed sequences */
354 /* number of parsed distances per sequence */
355 int iNParsedDists = 0;
356 /* total number of sequences/objects */
360 assert(NULL!=pcFileIn);
362 /* FIXME: test correctness and implement label checking */
363 fprintf(stderr, "WARNING: Reading of distance matrix from file not thoroughly tested!\n");
364 fprintf(stderr, "WARNING: Assuming same order of sequences in sequence file and distance matrix file (matching of labels not implemented)\n");
366 if (NULL == (buf = (char *) malloc(MAX_BUF_SIZE * sizeof(char)))) {
367 fprintf(stderr, "ERROR: couldn't allocate memory at %s:%s:%d\n",
368 __FILE__, __FUNCTION__, __LINE__);
372 if (NULL == (prFilePointer = fopen(pcFileIn, "r"))) {
373 fprintf(stderr, "ERROR: Couldn't open %s for reading\n", pcFileIn);
378 /* get number of sequences from first line and allocate memory for
382 if (NULL == fgets(buf, MAX_BUF_SIZE, prFilePointer) ) {
383 fprintf(stderr, "Couldn't read first line from %s\n", pcFileIn);
385 goto closefile_and_freebuf;
387 if (strlen(buf)==MAX_BUF_SIZE-1) {
388 fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)");
390 goto closefile_and_freebuf;
392 if (sscanf(buf, "%d", &iTotalNSeq)!=1) {
393 fprintf(stderr, "ERROR: couldn't parse number of sequences from first line of %s\n", pcFileIn);
395 goto closefile_and_freebuf;
399 Log(&rLog, LOG_FORCED_DEBUG, "iTotalNSeq parsed from %s is %d\n", pcFileIn, iTotalNSeq);
402 if (NewSymMatrix(prSymMat_p, iTotalNSeq, iTotalNSeq)) {
403 fprintf(stderr, "FATAL %s", "Memory allocation for distance matrix failed");
405 goto closefile_and_freebuf;
409 /* parse file line by line
412 while (NULL != fgets(buf, MAX_BUF_SIZE, prFilePointer)) {
416 if (MAX_BUF_SIZE-1 == strlen(buf)) {
417 fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)");
419 goto closefile_and_freebuf;
423 Log(&rLog, LOG_FORCED_DEBUG, "Got line: %s\n", buf);
426 /* new sequence label at beginning of line?
428 if (isblank(buf[0])) {
434 /* tokenise line and treat new sequence specially
436 szToken = strtok(buf, " \t");
441 /* if format is lower dimensional matrix then first
442 * sequence has no distances but might have newline
443 * character at it's end.
445 while (isspace(szToken[strlen(szToken)-1])) {
446 szToken[strlen(szToken)-1]='\0';
448 /* FIXME save label? */
449 szToken = strtok(NULL, " \t");
451 /* from here on it's only parsing of distances */
452 while (szToken != NULL) {
456 /* only parse what's needed */
457 if (iNParsedDists!=iNParsedSeq) {
458 /* parse and store distance
460 if (sscanf(szToken, "%lf", &dist)!=1) {
461 fprintf(stderr, "Couldn't parse float from entry '%s'\n", szToken);
463 goto closefile_and_freebuf;
466 Log(&rLog, LOG_FORCED_DEBUG, "Parsed distance %d for seq %d = %f\n", iNParsedDists-1, iNParsedSeq-1, dist);
468 SymMatrixSetValue(*prSymMat_p, iNParsedSeq-1, iNParsedDists-1, dist);
469 SymMatrixSetValue(*prSymMat_p, iNParsedDists-1, iNParsedSeq-1, dist);
471 szToken = strtok(NULL, " \t");
475 if (iTotalNSeq!=iNParsedSeq) {
476 fprintf(stderr, "expected %d seqs, but only parsed %d\n", iTotalNSeq, iNParsedSeq);
478 goto closefile_and_freebuf;
481 for (i=0; i<iNParsedSeq; i++) {
483 for (j=0; j<iNParsedSeq; j++) {
484 Log(&rLog, LOG_FORCED_DEBUG, "prSymMat_p[%d][%d]=%f\n", i, j, (*prSymMat_p)[i][j]);
489 closefile_and_freebuf:
490 fclose(prFilePointer);
495 /*** end: SymMatrixRead ***/