--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+
+/* for regex's */
+#include <sys/types.h>
+#include <regex.h>
+
+#include "gjutil.h"
+#include "oc.h"
+
+/*#define VERY_SMALL DBL_MIN*/
+#define VERY_SMALL -DBL_MAX
+#define VERY_BIG DBL_MAX
+#define MAX_ID_LEN 30
+
+/* Postscript plotting constants */
+#define POINT 72
+#define X_OFFSET 40
+#define Y_OFFSET 100
+
+#define DEBUG
+
+float MAXside = 11.0 * POINT; /* Max dimensions of paper */
+float MINside = 8.0 * POINT;
+float WTreeFraction = 0.9; /* Fraction of max taken up by Tree */
+float HTreeFraction = 0.9; /* (Calculated after subtracting offsets) */
+float TreeExtendFac = 0.05; /* Extension of tree beyond min/max value factor */
+double base_val = 1000.0;
+float IdOffsetFactor = 0.01;
+float NumberOffsetFactor = 0.05;
+float NumberFontSize = 12;
+float IdFontSize = 12;
+float TitleFontSize = 12;
+
+/****************************************************************************
+ Program oc:
+
+ General purpose cluster analysis program. Written with flexibility
+ in mind rather than all out efficiency.
+
+ Copyright(c) G. J. Barton (1993,1997,2002)
+
+ Author: G. J. Barton
+ School of Life Sciences
+ University of Dundee
+ Dow St.
+ Dundee DD1 5EH
+ Scotland
+
+ email:geoff@compbio.dundee.ac.uk
+
+ Example input file:
+
+7
+First
+Second
+Third
+Fourth
+Fifth
+Sixth
+Seventh
+100.0
+100.0
+50.0
+33.0
+25.0
+20.0
+100.0
+50.0
+50.0
+33.0
+33.0
+100.0
+33.0
+20.0
+25.0
+33.0
+20.0
+25.0
+100.0
+100.0
+100.0
+
+11/8/93: Add single order option. This forces the clustering to only add
+a single sequence at a time to the growing cluster. All other options are
+equivalent to the full tree method. GJB.
+
+15/12/93: Correct bug in definition of VERY_SMALL ie now = -DBL_MAX. This
+allows the complete and means linkage to work with negative numbers.
+
+04/08/2002:Update affiliation and also add gjnoc2 routine to the distribution.
+----------------------------------------------------------------------------*/
+
+int
+main (int argc, char *argv[])
+{
+ extern FILE *std_in, *std_out, *std_err;
+ extern double base_val;
+ int error;
+ int n;
+ int nclust;
+ int i, j, k, l;
+ int i1loc, j1loc;
+ int i2loc, j2loc;
+ int i3loc, j3loc;
+ double U1val, U2val, U3val, rval;
+ struct sc *clust;
+ double **arr; /* upper diagonal array structure */
+ int *unclust; /* list of unclustered entities */
+ int nunclust; /* number of unclustered entities */
+ char **idents; /* array of idents for the entities */
+ int *notparent; /* list of clusters that have no children */
+ int nnotparent; /* number of clusters that have no children */
+ int *inx; /* array of indexes - inx[i] is the re-ordered position */
+ char *postfile; /* name of PostScript output file */
+
+ /* TIME INFORMATION */
+ clock_t start_time, end_time, initial_time, final_time;
+
+ /* options */
+ int sim; /* similarity scores =1, distances = 2 */
+ int type; /* single linkage = 1, complete linkage = 2, means = 3 */
+ int option; /* use to feed the Grp_Grp function */
+ int ps;
+ int dolog; /* flag to take logs on output to .ps file */
+ double cut;
+ int do_cut;
+ int showid;
+ int amps_out; /* output tree file and order file for amps */
+ int timeclus; /* set to 1 if timing output to std_err required */
+ int single_order;
+
+ /* amps files */
+ char *amps_order_file;
+ char *amps_tree_file;
+ FILE *ford;
+ FILE *ftree;
+ FILE *fp;
+
+ GJinitfile ();
+
+ sim = 1;
+ type = 1;
+ ps = 0; /* default is no PostScript output */
+ showid = 0;
+ do_cut = 0;
+ timeclus = 0;
+ amps_out = 0;
+ dolog = 0;
+ single_order = 0;
+
+ start_time = clock ();
+ initial_time = start_time;
+
+ if (argc > 1)
+ {
+ for (i = 1; i < argc; ++i)
+ {
+ if (strcmp (argv[i], "sim") == 0)
+ {
+ sim = 1;
+ base_val = VERY_BIG;
+ cut = VERY_SMALL;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "dis") == 0)
+ {
+ sim = 2;
+ base_val = VERY_SMALL;
+ cut = VERY_BIG;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "single") == 0)
+ {
+ type = 1;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "complete") == 0)
+ {
+ type = 2;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "means") == 0)
+ {
+ type = 3;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "order") == 0)
+ {
+ /* do a single order clustering rather than a full tree */
+ single_order = 1;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "ps") == 0
+ || strcmp (argv[i], "eps") == 0)
+ {
+ ++i;
+ ps = 1;
+ postfile = GJcat (2, argv[i], ".eps");
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "log") == 0)
+ {
+ dolog = 1;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "cut") == 0)
+ {
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ ++i;
+ cut = atof (argv[i]);
+ do_cut = 1;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "amps") == 0)
+ {
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ amps_out = 1;
+ ++i;
+ if (single_order)
+ {
+ amps_order_file = GJcat (2, argv[i], ".ord");
+ }
+ else
+ {
+ amps_tree_file = GJcat (2, argv[i], ".tree");
+ amps_order_file = GJcat (2, argv[i], ".tord");
+ }
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "id") == 0)
+ {
+ showid = 1; /* output idents rather than index */
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "timeclus") == 0)
+ {
+ timeclus = 1; /* output idents rather than index */
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "maxside") == 0)
+ {
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ ++i;
+ MAXside = atof (argv[i]) * POINT;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else if (strcmp (argv[i], "minside") == 0)
+ {
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ ++i;
+ MINside = atof (argv[i]) * POINT;
+ /* fprintf(std_err,"Option: argv[%d]=%s\n",i,argv[i]); */
+ }
+ else
+ {
+ fprintf (std_err, "Unrecognised Option: argv[%d]=%s\n", i, argv[i]);
+ }
+ }
+ }
+ else
+ {
+ fprintf (std_err, "Cluster analysis program\n\n");
+ fprintf (std_err, "Author: G. J. Barton, University of Dundee, UK\n");
+ fprintf (std_err, " http://www.compbio.dundee.ac.uk\n");
+ fprintf (std_err, "Copyright: G. J. Barton (1993,1997,2002,2004)\n\n");
+ fprintf (std_err, "Please cite: OC a cluster analysis program, Barton, G. J. 1993\n\n");
+ fprintf (std_err, "Usage: oc <sim/dis> <single/complete/means> <eps file> <cut N>\n\n");
+ fprintf (std_err, "Version 2.1a (Apr. 2005) - Requires a file to be piped to standard input\n");
+ fprintf (std_err, "Format: Line 1: Number (N) of entities to cluster (e.g. 10)\n");
+ fprintf (std_err, "Format: Lines 2 to 2+N-1: Identifier codes for the entities (e.g. Entity1)\n");
+ fprintf (std_err, "Format: N*(N-1)/2: Distances, or similarities - ie the upper diagonal\n\n");
+ fprintf (std_err, "Options:\n");
+ fprintf (std_err, "sim = similarity / dis = distances\n");
+ fprintf (std_err, "method = single/complete/means\n");
+ fprintf (std_err, "ps <file> = plot out portrait dendrogram to <file.eps> \n");
+ fprintf (std_err, "eps <file> = plot out portrait dendrogram to <file.eps> \n");
+ fprintf (std_err, "minside <dim> = define length of short (x) side of plot in inches \n");
+ fprintf (std_err, "maxside <dim> = define length of long (y) side of plot in inches \n");
+ fprintf (std_err, "log = take logs before calculation \n");
+ fprintf (std_err, "cut = only show clusters above/below the cutoff\n");
+ fprintf (std_err, "id = output identifier codes rather than indexes for entities\n");
+ fprintf (std_err, "timeclus = output times to generate each cluster\n");
+ fprintf (std_err, "amps <file> = produce amps <file>.tree and <file>.tord files\n\n");
+ fprintf (std_err, "See oc_manual.txt file for brief instructions\n");
+
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Read in the OC file */
+ /* allocate and read in the upper diagonal array */
+ fprintf (std_err, "Reading Upper Diagonal\n");
+ n = read_num (std_in);
+ idents = read_idents (std_in, n);
+ arr = read_up_diag (std_in, n);
+ if (dolog)
+ {
+ up_diag_log (arr, n);
+ }
+
+/* write_up_diag(std_out,arr,n);*/
+ fprintf (std_err, "Read: %d Entries\n", n);
+ /* File has been read in */
+
+ end_time = clock ();
+ fprintf (std_err, "CPU time: %f seconds\n",
+ ((float) (end_time - start_time)) / CLOCKS_PER_SEC);
+ start_time = end_time;
+
+ /* holds index of unclustered entities */
+
+ fprintf (std_err, "Setting up unclust\n");
+ unclust = (int *) GJmalloc (sizeof (int) * n);
+ for (i = 0; i < n; ++i)
+ {
+ unclust[i] = i;
+ }
+ nunclust = n;
+
+ fprintf (std_err, "Setting up notparent\n");
+ notparent = (int *) GJmalloc (sizeof (int) * n);
+ for (i = 0; i < n; ++i)
+ {
+ notparent[i] = 0;
+ }
+ nnotparent = 0;
+
+
+
+ /* set up the clust array to hold results */
+ /* there are always n-1 clusters formed - we will also store the nth (all entities) */
+ fprintf (std_err, "Setting up clust\n");
+ clust = (struct sc *) GJmalloc (sizeof (struct sc) * n);
+ end_time = clock ();
+ fprintf (std_err, "CPU time: %f seconds\n",
+ ((float) (end_time - start_time)) / CLOCKS_PER_SEC);
+ start_time = end_time;
+
+ nclust = 0;
+ U1val = VERY_SMALL;
+ U2val = VERY_SMALL;
+ U3val = VERY_SMALL;
+ if (sim == 1)
+ {
+ if (type == 1)
+ {
+ fprintf (std_err, "Single linkage on similarity\n");
+ option = 1;
+ }
+ else if (type == 2)
+ {
+ fprintf (std_err, "Complete linkage on similarity\n");
+ option = 0;
+ }
+ else if (type == 3)
+ {
+ fprintf (std_err, "Means linkage on similarity\n");
+ option = 2;
+ }
+ }
+ else
+ {
+ if (type == 1)
+ {
+ fprintf (std_err, "Single linkage on distance\n");
+ option = 0;
+ }
+ else if (type == 2)
+ {
+ fprintf (std_err, "Complete linkage on distance\n");
+ option = 1;
+ }
+ else if (type == 3)
+ {
+ fprintf (std_err, "Means linkage on distance\n");
+ option = 2;
+ }
+ }
+
+/* implement max similarity single linkage first */
+
+ fprintf (std_err, "Doing Cluster Analysis...\n");
+ while (nclust < (n - 1))
+ {
+ if (sim == 1)
+ {
+ U1val = VERY_SMALL;
+ U2val = VERY_SMALL;
+ U3val = VERY_SMALL;
+ }
+ else
+ {
+ U1val = VERY_BIG;
+ U2val = VERY_BIG;
+ U3val = VERY_BIG;
+ }
+ if (nunclust > 0)
+ {
+/* fprintf(std_err,"Option 1\n");*/
+ if (!single_order || (single_order && nclust == 0))
+ {
+ /* we've not clustered all entities into groups */
+ /* find max of what is left */
+ for (i = 0; i < nunclust - 1; ++i)
+ {
+ for (j = (i + 1); j < nunclust; ++j)
+ {
+ rval = val_A_B (arr, n, unclust[i], unclust[j]);
+ if (sim == 1)
+ {
+ if (rval > U1val)
+ {
+ U1val = rval;
+ i1loc = unclust[i];
+ j1loc = unclust[j];
+ }
+ }
+ else
+ {
+ if (rval < U1val)
+ {
+ U1val = rval;
+ i1loc = unclust[i];
+ j1loc = unclust[j];
+ }
+ }
+ }
+ } /* ... for(i=0... */
+ } /* ...if(!single_order... */
+ }
+ if (nnotparent > 0 && nunclust > 0)
+ {
+/* fprintf(std_err,"Option 2\n");*/
+ /* we have some clusters */
+ /* so compare them to the unclustered entitites */
+ for (i = 0; i < nunclust; ++i)
+ {
+ for (j = 0; j < nnotparent; ++j)
+ {
+ k = notparent[j];
+ rval =
+ val_Grp_B (arr, n, clust[k].members, clust[k].n,
+ unclust[i], option);
+ if (sim == 1)
+ {
+ if (rval >= U2val)
+ {
+ /* need >= because want biggest previous cluster */
+ U2val = rval;
+ i2loc = unclust[i];
+ j2loc = k;
+ }
+ }
+ else
+ {
+ if (rval <= U2val)
+ {
+ /* need <= because want biggest previous cluster */
+ U2val = rval;
+ i2loc = unclust[i];
+ j2loc = k;
+ }
+ }
+ }
+ }
+ }
+ if (nnotparent > 1)
+ {
+/* fprintf(std_err,"Option 3\n");*/
+ /* compare clusters to clusters if there are more than 1 */
+ for (i = 0; i < nnotparent - 1; ++i)
+ {
+ k = notparent[i];
+ for (j = (i + 1); j < nnotparent; ++j)
+ {
+ l = notparent[j];
+ rval = val_Grp_Grp (arr, n, clust[k].members, clust[k].n,
+ clust[l].members, clust[l].n, option);
+ if (sim == 1)
+ {
+ if (rval >= U3val)
+ {
+ U3val = rval;
+ i3loc = k;
+ j3loc = l;
+ } /* if(rval >= U3val... */
+ }
+ else
+ {
+ if (rval <= U3val)
+ {
+ U3val = rval;
+ i3loc = k;
+ j3loc = l;
+ } /* if(rval >= U3val... */
+ }
+ } /* if j=(i+1) ... */
+ } /* for(i=0... */
+ }
+ /* find which search gave the biggest (or smallest) value */
+ if ((sim == 1 && U3val >= U1val && U3val >= U2val) ||
+ (sim == 2 && U3val <= U1val && U3val <= U2val))
+ {
+ /* joining of two clusters */
+ clust[nclust].score = U3val;
+ clust[nclust].n = clust[i3loc].n + clust[j3loc].n;
+ clust[nclust].members =
+ (int *) GJmalloc (sizeof (int) * clust[nclust].n);
+ for (i = 0; i < clust[i3loc].n; ++i)
+ {
+ clust[nclust].members[i] = clust[i3loc].members[i];
+ }
+ for (i = 0; i < clust[j3loc].n; ++i)
+ {
+ clust[nclust].members[i + clust[i3loc].n] =
+ clust[j3loc].members[i];
+ }
+ clust[nclust].parentA = &clust[i3loc];
+ clust[nclust].parentB = &clust[j3loc];
+ clust[nclust].lab = 1;
+ remove_notparent (notparent, &nnotparent, i3loc);
+ sub_notparent (notparent, &nnotparent, j3loc, nclust);
+ }
+ else if ((sim == 1 && U2val >= U1val && U2val >= U3val) ||
+ (sim == 2 && U2val <= U1val && U2val <= U3val))
+ {
+ /* joining of a single entity to an existing cluster */
+ clust[nclust].score = U2val;
+ clust[nclust].n = 1 + clust[j2loc].n;
+ clust[nclust].members =
+ (int *) GJmalloc (sizeof (int) * clust[nclust].n);
+ /* copy previous cluster list into this one - not strictly neccessary */
+ /* but done to simplify cluster comparisons above */
+ for (i = 1; i < clust[nclust].n; ++i)
+ {
+ clust[nclust].members[i] = clust[j2loc].members[i - 1];
+ }
+ /* stick solitary cluster member on the beginning */
+ clust[nclust].members[0] = i2loc;
+ /* put in parent pointers */
+ clust[nclust].parentA = make_entity (i2loc, base_val);
+ clust[nclust].parentB = &clust[j2loc];
+ clust[nclust].lab = 1;
+ sub_notparent (notparent, &nnotparent, j2loc, nclust);
+ remove_unclust (unclust, &nunclust, i2loc);
+ }
+ else if ((sim == 1 && U1val >= U2val && U1val >= U3val) ||
+ (sim == 2 && U1val <= U2val && U1val <= U3val))
+ {
+ /* unclustered pair were biggest */
+ clust[nclust].score = U1val;
+ clust[nclust].n = 2;
+ clust[nclust].members =
+ (int *) GJmalloc (sizeof (int) * clust[nclust].n);
+ clust[nclust].members[0] = i1loc;
+ clust[nclust].members[1] = j1loc;
+ clust[nclust].parentA = make_entity (i1loc, base_val);
+ clust[nclust].parentB = make_entity (j1loc, base_val);
+ clust[nclust].lab = 1;
+ add_notparent (notparent, &nnotparent, nclust);
+ remove_unclust (unclust, &nunclust, i1loc);
+ remove_unclust (unclust, &nunclust, j1loc);
+ }
+
+/* fprintf(std_err,"Current cluster\n");*/
+/* show_entity(&clust[nclust],std_err);*/
+
+ if (timeclus)
+ {
+ fprintf (std_err, "Cluster: %d DONE! ", nclust);
+ end_time = clock ();
+ fprintf (std_err, "CPU time: %f seconds\n",
+ ((float) (end_time - start_time)) / CLOCKS_PER_SEC);
+ start_time = end_time;
+ }
+
+ ++nclust;
+ }
+ /* now we should have all clusters stored in the clust array */
+ /* set up the inx array */
+ inx = (int *) GJmalloc (sizeof (int) * n);
+ for (i = 0; i < clust[nclust - 1].n; ++i)
+ {
+ inx[clust[nclust - 1].members[i]] = i;
+ }
+
+ if (do_cut)
+ {
+ /* only output the biggest clusters that are above the cutoff */
+ if (sim == 1)
+ {
+ for (i = (nclust - 1); i >= 0; --i)
+ {
+ if (clust[i].lab == 1)
+ {
+ /* only output clusters that aren't already output as part of something bigger */
+ if (clust[i].score >= cut)
+ {
+ fprintf (std_out, "## %d %g %d\n", i, clust[i].score,
+ clust[i].n);
+ show_id_entity (&clust[i], idents, std_out);
+ mark_parents (&clust[i]);
+ }
+ }
+ }
+ }
+ else
+ {
+ for (i = (nclust - 1); i >= 0; --i)
+ {
+ if (clust[i].lab == 1)
+ {
+ /* only output clusters that aren't already output as part of something bigger */
+ if (clust[i].score <= cut)
+ {
+ fprintf (std_out, "## %d %g %d\n", i, clust[i].score,
+ clust[i].n);
+ show_id_entity (&clust[i], idents, std_out);
+ mark_parents (&clust[i]);
+ }
+ }
+ }
+ }
+ /* now output the unclustered entities */
+ fprintf (std_out, "\nUNCLUSTERED ENTITIES\n");
+ write_unclustered (&clust[nclust - 1], idents, std_out);
+
+ }
+ else
+ {
+ /* output all clusters from smallest to largest */
+ if (amps_out)
+ {
+/* fprintf(std_err,"amps_out not implemented yet\n");*/
+ if (!single_order)
+ {
+ fprintf (std_err,
+ "Opening AMPS order and tree files for writing\n");
+ ford = GJfopen (amps_order_file, "w", 1);
+ ftree = GJfopen (amps_tree_file, "w", 1);
+ }
+ else
+ {
+ fprintf (std_err, "Opening AMPS order file for writing\n");
+ ford = GJfopen (amps_order_file, "w", 1);
+ }
+ /* output order file */
+ if (single_order)
+ {
+ /* order file must be reversed */
+ for (i = nclust; i > -1; --i)
+ {
+ fprintf (ford, "%11d%20.2f%10d\n",
+ clust[nclust - 1].members[i] + 1, 0.0, 0);
+ }
+ }
+ else
+ {
+ /* order file for use with the tree */
+ for (i = 0; i < (nclust + 1); ++i)
+ {
+ fprintf (ford, "%11d%20.2f%10d\n",
+ clust[nclust - 1].members[i] + 1, 0.0, 0);
+ }
+ }
+ GJfclose (ford, 1);
+ if (!single_order)
+ {
+ /* output the tree_file */
+ for (i = 0; i < nclust; ++i)
+ {
+ print_amps_cluster (clust[i].parentA, inx, ftree);
+ print_amps_cluster (clust[i].parentB, inx, ftree);
+ }
+ GJfclose (ftree, 1);
+ }
+ }
+ else
+ {
+ for (i = 0; i < nclust; ++i)
+ {
+ fprintf (std_out, "## %d %g %d\n", i, clust[i].score,
+ clust[i].n);
+ if (showid)
+ {
+ show_id_entity (&clust[i], idents, std_out);
+ }
+ else
+ {
+ show_entity (&clust[i], std_out);
+ }
+
+ /* show_inx_entity(&clust[i],inx,std_out); */
+
+ /* fprintf(std_out,"Parents:\n");
+ show_entity(clust[i].parentA,std_out);
+ show_entity(clust[i].parentB,std_out);
+ */
+ }
+ }
+ }
+
+ if (ps == 1)
+ {
+ fp = GJfopen (postfile, "w", 1);
+ draw_dendrogram (fp, clust, idents, inx, nclust, sim);
+ }
+
+ final_time = clock ();
+ fprintf (std_err, "Total CPU time: %f seconds\n",
+ (float) (final_time - initial_time) / CLOCKS_PER_SEC);
+ exit (EXIT_SUCCESS);
+
+}
+
+/*********************************************************************
+up_diag_log: take logs for the upper diagonal structure
+-------------------------------------------------------------------*/
+
+void
+up_diag_log (double **arr, int n)
+{
+ int i, j;
+
+ for (i = 0; i < (n - 1); ++i)
+ {
+ for (j = 0; j < (n - i - 1); ++j)
+ {
+ arr[i][j] = log (arr[i][j]);
+ }
+ }
+}
+
+/**********************************************************************
+write_up_diag:
+writes out n, the number of entities, followed by
+n*(n-1)/2 numbers that are the pair distances or similarities between
+the n entities.
+
+-----------------------------------------------------------------------*/
+void
+write_up_diag (FILE * infile, double **ret_val, int n)
+{
+ int i, j;
+
+ fprintf (infile, "Number of Entities: %d\n", n);
+ for (i = 0; i < (n - 1); ++i)
+ {
+ for (j = 0; j < (n - i - 1); ++j)
+ {
+ fprintf (infile, " %f", ret_val[i][j]);
+ }
+ fprintf (infile, "\n");
+ }
+}
+
+/**********************************************************************
+show_entity:
+
+print out the score, number of members and the members for
+the entity
+----------------------------------------------------------------------*/
+void
+show_entity (struct sc *entity, FILE * out)
+{
+ int i;
+
+ if (entity == NULL)
+ {
+ fprintf (out, "No entity\n");
+ return;
+ }
+
+ fprintf (out, "Entity Score: %g Number of members: %d\n",
+ entity->score, entity->n);
+
+ for (i = 0; i < entity->n; ++i)
+ {
+ fprintf (out, " %d", entity->members[i]);
+ }
+ fprintf (out, "\n");
+}
+
+/**********************************************************************
+show_id_entity:
+
+print out the score, number of members and the members for
+the entity using the id codes rather than numerical positions
+----------------------------------------------------------------------*/
+void
+show_id_entity (struct sc *entity, char **id, FILE * out)
+{
+ int i;
+
+ if (entity == NULL)
+ {
+ fprintf (out, "No entity\n");
+ return;
+ }
+
+/* fprintf(out,"Entity Score: %f Number of members: %d\n",
+ entity->score,entity->n);
+*/
+
+ for (i = 0; i < entity->n; ++i)
+ {
+ fprintf (out, " %s", id[entity->members[i]]);
+ }
+ fprintf (out, "\n");
+}
+
+/**********************************************************************
+show_inx_entity:
+
+print out the score, number of members and the members for
+the entity - followed by the index for the entity
+----------------------------------------------------------------------*/
+void
+show_inx_entity (struct sc *entity, int *inx, FILE * out)
+{
+ int i;
+
+ if (entity == NULL)
+ {
+ fprintf (out, "No entity\n");
+ return;
+ }
+
+ fprintf (out, "Entity Score: %g Number of members: %d\n",
+ entity->score, entity->n);
+ for (i = 0; i < entity->n; ++i)
+ {
+ fprintf (out, " %d", entity->members[i]);
+ }
+ fprintf (out, "\n");
+ for (i = 0; i < entity->n; ++i)
+ {
+ fprintf (out, " %d", inx[entity->members[i]]);
+ }
+ fprintf (out, "\n");
+}
+
+/**********************************************************************
+print_amps_cluster:
+
+print out the cluster in AMPS tree file format.
+ie 1x,20i5 Fortran format.
+
+inx is necessary to so that the identifying codes are re-ordered
+appropriately for AMPS
+
+Need +1 cos AMPS uses sequence numbering from 1...N not 0...N-1
+
+----------------------------------------------------------------------*/
+void
+print_amps_cluster (struct sc *entity, int *inx, FILE * fp)
+{
+ int i;
+ int j;
+
+ fprintf (fp, " %5d\n", entity->n);
+
+ j = 0;
+ for (i = 0; i < entity->n; ++i)
+ {
+ if (j == 20)
+ {
+ j = 0;
+ }
+ if (j == 0)
+ {
+ fprintf (fp, " %5d", inx[entity->members[i]] + 1);
+ }
+ else
+ {
+ fprintf (fp, "%5d", inx[entity->members[i]] + 1);
+ }
+ if (j == 19)
+ {
+ fprintf (fp, "\n");
+ }
+ ++j;
+ }
+ /* change from < to <= to avoid blunders at end of line */
+ if (j <= 19)
+ fprintf (fp, "\n");
+}
+
+/******************************************************************
+make_entity: return pointer to malloc'd space for a struct sc
+ structure for a single entity. i.
+ base_val: The value form minimum distance, or maximum similarity.
+-------------------------------------------------------------------*/
+struct sc *
+make_entity (int i, double base_val)
+{
+ struct sc *ret_val;
+ ret_val = (struct sc *) GJmalloc (sizeof (struct sc));
+ ret_val->score = base_val;
+ ret_val->n = 1;
+ ret_val->members = (int *) GJmalloc (sizeof (int));
+ ret_val->members[0] = i;
+ ret_val->parentA = NULL;
+ ret_val->parentB = NULL;
+ ret_val->lab = 1;
+ return ret_val;
+}
+
+/******************************************************************
+val_A_B: Given the upper diagonal array, return the value stored
+ in the notional arr[i][j].
+ Where 0 <= i < n-1
+ and i < j < n
+
+Note: This could/should be turned into a macro - without the bounds
+ check.
+------------------------------------------------------------------
+
+double val_A_B(
+ double **arr,
+ int n,
+ int i,
+ int j)
+
+{
+ extern FILE *std_err;
+ if(!(i >= 0 && i < n-1 && i < j && j < n)){
+ GJerror("Invalid request in val_A_B");
+ fprintf(std_err,"i,j,n %d %d %d \n",i,j,n);
+ exit(-1);
+ }
+
+ return arr[i][j-i-1];
+}
+*/
+/*******************************************************************
+val_Grp_B: Given upper diagonal array,
+ find min, max, or mean of comparison between the
+ entities in grp and j
+ choice = 0 = find min value of grp-grp comparison
+ 1 = find max ...
+ 2 = find mean ...
+
+Note: there is some duplication in the code to avoid testing choice
+ on every step in the loop
+--------------------------------------------------------------------*/
+double
+val_Grp_B (double **arr, /* upper diagonal array */
+ int n, /* side of array */
+ int *grpA, /* array containing entities of group */
+ int ng, /* number of members in grpA */
+ int j, /* entity to compare to grpA */
+ int choice /* see above */
+ )
+{
+ int k;
+ double val;
+ double rval;
+
+ if (choice == 0)
+ {
+ /* min case */
+ val = VERY_BIG;
+ for (k = 0; k < ng; ++k)
+ {
+ if (grpA[k] < j)
+ {
+ rval = val_A_B (arr, n, grpA[k], j);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, j, grpA[k]);
+ }
+ if (rval < val)
+ val = rval;
+ }
+ }
+ else if (choice == 1)
+ {
+ val = VERY_SMALL;
+ for (k = 0; k < ng; ++k)
+ {
+ if (grpA[k] < j)
+ {
+ rval = val_A_B (arr, n, grpA[k], j);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, j, grpA[k]);
+ }
+ if (rval > val)
+ val = rval;
+ }
+ }
+ else if (choice == 2)
+ {
+ val = 0.0;
+ for (k = 0; k < ng; ++k)
+ {
+ if (grpA[k] < j)
+ {
+ rval = val_A_B (arr, n, grpA[k], j);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, j, grpA[k]);
+ }
+ val += rval;
+ }
+ val /= ng;
+ }
+ return val;
+}
+
+/*******************************************************************
+val_Grp_Grp: Given upper diagonal array,
+ find min, max, or mean of comparison between the
+ entities in grpA and grpB
+ choice = 0 = find min value of grp-grp comparison
+ 1 = find max ...
+ 2 = find mean ...
+
+Note: there is some duplication in the code to avoid testing choice
+ on every step in the loop
+--------------------------------------------------------------------*/
+double
+val_Grp_Grp (double **arr, /* upper diagonal array */
+ int n, /* side of array */
+ int *grpA, /* array containing entities of group */
+ int ngA, /* number of members in grpA */
+ int *grpB, /* array containing entities of group B */
+ int ngB, /* number of members in grpB */
+ int choice /* see above */
+ )
+{
+ int k, l;
+ double val;
+ double rval;
+
+ if (choice == 0)
+ {
+ /* min case */
+ val = VERY_BIG;
+ for (k = 0; k < ngA; ++k)
+ {
+ for (l = 0; l < ngB; ++l)
+ {
+ if (grpA[k] < grpB[l])
+ {
+ rval = val_A_B (arr, n, grpA[k], grpB[l]);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, grpB[l], grpA[k]);
+ }
+ if (rval < val)
+ val = rval;
+ }
+ }
+ }
+ else if (choice == 1)
+ {
+ val = VERY_SMALL;
+ for (k = 0; k < ngA; ++k)
+ {
+ for (l = 0; l < ngB; ++l)
+ {
+ if (grpA[k] < grpB[l])
+ {
+ rval = val_A_B (arr, n, grpA[k], grpB[l]);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, grpB[l], grpA[k]);
+ }
+ if (rval > val)
+ val = rval;
+ }
+ }
+ }
+ else if (choice == 2)
+ {
+ val = 0.0;
+ for (k = 0; k < ngA; ++k)
+ {
+ for (l = 0; l < ngB; ++l)
+ {
+ if (grpA[k] < grpB[l])
+ {
+ rval = val_A_B (arr, n, grpA[k], grpB[l]);
+ }
+ else
+ {
+ rval = val_A_B (arr, n, grpB[l], grpA[k]);
+ }
+ val += rval;
+ }
+ }
+ val /= (ngA * ngB);
+ }
+ return val;
+}
+
+/*****************************************************************
+remove_unclust:
+
+Take the array containing integers showing members that have
+yet to be clustered and remove an element. nunclust is returned
+reduced by one.
+-----------------------------------------------------------------*/
+void
+remove_unclust (int *unclust, int *nunclust, int val)
+{
+ int i, j, found;
+ i = 0;
+ found = 0;
+
+ while (i < *nunclust)
+ {
+ if (unclust[i] == val)
+ {
+ ++i;
+ found = 1;
+ }
+ if (found)
+ {
+ j = i - 1;
+ }
+ else
+ {
+ j = i;
+ }
+ unclust[j] = unclust[i];
+ ++i;
+ }
+ --(*nunclust);
+}
+
+void
+iprintarr (int *arr, int n, FILE * out)
+{
+ int i;
+ for (i = 0; i < n; ++i)
+ {
+ fprintf (out, " %d", arr[i]);
+ }
+ fprintf (out, "\n");
+}
+
+/******************************************************************
+no_share: return 1 if grpA and grpB have no common members
+ 0 if there are common members
+----------------------------------------------------------------*/
+
+int
+no_share (int *grpA, int ngA, int *grpB, int ngB)
+{
+ int i, j;
+
+ for (i = 0; i < ngA; ++i)
+ {
+ for (j = 0; j < ngB; ++j)
+ {
+ if (grpA[i] == grpB[j])
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*****************************************************************
+sub_notparent: given the notparent list substitutes A by B
+-----------------------------------------------------------------*/
+void
+sub_notparent (int *np, int *n, int A, int B)
+{
+ int i;
+ for (i = 0; i < *n; ++i)
+ {
+ if (np[i] == A)
+ {
+ np[i] = B;
+ return;
+ }
+ }
+}
+
+/*****************************************************************
+remove_notparent: given the notparent list removes A from the list
+
+-----------------------------------------------------------------*/
+void
+remove_notparent (int *np, int *n, int A)
+{
+ remove_unclust (np, n, A);
+}
+
+/*****************************************************************
+add_notparent: given the notparent list adds A to the list
+
+-----------------------------------------------------------------*/
+void
+add_notparent (int *np, int *n, int A)
+{
+ np[*n] = A;
+ ++(*n);
+}
+
+/**************************************************************************
+Return a pointer to space for an upper diagonal square array stored rowwise
+G. J. B. 16 May 1993
+
+double precision array.
+side = n;
+--------------------------------------------------------------------------*/
+double **
+GJDudarr (int n)
+{
+ double **ret_val;
+ int i, nm1;
+
+ nm1 = n - 1;
+ ret_val = (double **) GJmalloc (sizeof (double *) * nm1);
+
+
+ for (i = 0; i < nm1; ++i)
+ {
+ ret_val[i] = (double *) GJmalloc (sizeof (double) * (nm1 - i));
+ }
+
+ return ret_val;
+}
+
+/***************************************************************************
+draw_dendrogram:
+
+Output the dendrogram to fout using PostScript commands
+
+---------------------------------------------------------------------------*/
+void
+draw_dendrogram (FILE * fout,
+ struct sc *clust, char **idents, int *inx, int n, int sim)
+{
+ extern double base_val;
+ extern float MINside, MAXside;
+ extern float WTreeFraction, HTreeFraction, TreeExtendFac;
+
+ float Twidth, Theight; /* x and y limits for the tree part of the plot */
+ float Xoffset, Yoffset; /* x and y offset (points) */
+
+ int i;
+ double x1, x2, x3, y1, y2;
+ double Xrange, Yrange, Xmin, Xmax, Ymin, Ymax; /*,TXmax; */
+/* double Xscale,Yscale;*/
+ double Xid, Yid;
+
+ char *buff;
+
+ buff = GJstrcreate (MAX_ID_LEN, NULL);
+
+ Xoffset = X_OFFSET;
+ Yoffset = Y_OFFSET;
+
+ Twidth = (MINside * WTreeFraction) - Xoffset;
+ Theight = (MAXside * HTreeFraction) - Yoffset;
+
+ Xmin = VERY_BIG;
+ Xmax = VERY_SMALL;
+ Ymin = VERY_BIG;
+ Ymax = VERY_SMALL;
+
+ /* find the min and max in X excluding the base_val */
+ for (i = 0; i < n; ++i)
+ {
+ if (sim == 1)
+ {
+ if (clust[i].score < Xmin)
+ Xmin = clust[i].score;
+ if (clust[i].score != base_val && clust[i].score > Xmax)
+ Xmax = clust[i].score;
+ }
+ else
+ {
+ if (clust[i].score != base_val && clust[i].score < Xmin)
+ Xmin = clust[i].score;
+ if (clust[i].score > Xmax)
+ Xmax = clust[i].score;
+ }
+ }
+ /* set the max/min value slightly bigger/smaller than the actual max/min */
+
+ if (sim == 1)
+ Xmax *= (1.0 + TreeExtendFac);
+ if (sim == 2)
+ Xmin *= (1.0 - TreeExtendFac);
+ Xrange = Xmax - Xmin;
+ Ymin = 0.0;
+ Ymax = n;
+ Yrange = (double) Ymax - Ymin;
+
+ /* write out the preamble */
+
+ fprintf (fout, "%%!PS-Adobe-2.0 EPSF-2.0\n");
+ fprintf (fout, "%%%%Title: oc output file\n");
+ fprintf (fout,
+ "%%%%Creator: oc by G. J. Barton: http://www.compbio.dundee.ac.uk\n");
+ /* fprintf(fout,"%%%%Pages: 1\n"); */
+ fprintf (fout, "%%%%DocumentFonts: Times-Roman\n");
+ fprintf (fout, "%%%%BoundingBox: %d %d %d %d\n", (int) Xoffset,
+ (int) Yoffset, (int) MINside, (int) MAXside);
+ fprintf (fout, "%%%%EndComments\n");
+
+ fprintf (fout, "/Times-Roman findfont 12 scalefont setfont\n");
+
+ /* get the X position for id codes then write them out */
+ if (sim == 1)
+ {
+ Xid = Xmax + (Xrange * IdOffsetFactor);
+ }
+ else
+ {
+ Xid = Xmin - (Xrange * IdOffsetFactor);
+ }
+ Xid = trans_x (Xid, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+
+ for (i = 0; i < (n + 1); ++i)
+ {
+ Yid = trans_y ((double) i, Ymin, Ymax, Yrange, Theight, Yoffset);
+ PSPutText ((float) Xid, (float) Yid, idents[clust[n - 1].members[i]],
+ fout);
+ }
+
+ /* write min and max scores below the plot */
+ x1 = Xmin;
+ x2 = Xmax;
+ y1 = Ymin - (Yrange * NumberOffsetFactor);
+ x1 = trans_x (x1, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+ x2 = trans_x (x2, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+ y1 = trans_y (y1, Ymin, Ymax, Yrange, Theight, Yoffset);
+ sprintf (buff, "%g", Xmin);
+ PSPutText ((float) x1, (float) y1, buff, fout);
+ sprintf (buff, "%g", Xmax);
+ PSPutText ((float) x2, (float) y1, buff, fout);
+
+ for (i = 0; i < n; ++i)
+ {
+ float bob = 1. / 0.;
+
+ x1 = clust[i].parentA->score;
+ x2 = clust[i].score;
+ x3 = clust[i].parentB->score;
+ if (x1 == base_val && sim == 1)
+ x1 = Xmax;
+ if (x1 == base_val && sim == 2)
+ x1 = Xmin;
+ if (x3 == base_val && sim == 1)
+ x3 = Xmax;
+ if (x3 == base_val && sim == 2)
+ x3 = Xmin;
+ y1 = get_mean (clust[i].parentA[0].members, inx, clust[i].parentA[0].n);
+ y2 = get_mean (clust[i].parentB[0].members, inx, clust[i].parentB[0].n);
+ /* transform x and y to fit on the page, then */
+ x1 = trans_x (x1, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+ x2 = trans_x (x2, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+
+ /* test for inf */
+ if (bob == x2)
+ {
+ fprintf (stderr, "%e, %e, %e, %e, %e, %e, %e\n", x1, (double)sim, Xmin,
+ Xmax, Xrange, Twidth, Xoffset);
+ exit (EXIT_FAILURE);
+ }
+
+ x3 = trans_x (x3, sim, Xmin, Xmax, Xrange, Twidth, Xoffset);
+ y1 = trans_y (y1, Ymin, Ymax, Yrange, Theight, Yoffset);
+ y2 = trans_y (y2, Ymin, Ymax, Yrange, Theight, Yoffset);
+
+ draw_arch (fout, (float) x1, (float) x2, (float) x3, (float) y1,
+ (float) y2);
+ }
+ fprintf (fout, "showpage\n");
+}
+
+double
+get_mean (int *arr, int *inx, int n)
+{
+ double sum;
+ int i;
+ sum = 0.0;
+ for (i = 0; i < n; ++i)
+ {
+ sum += (double) inx[arr[i]];
+ }
+ return sum / n;
+}
+
+void
+draw_arch (FILE * fout, float x1, float x2, float x3, float y1, float y2)
+{
+ fprintf (fout, "%f %f moveto\n", x1, y1);
+ fprintf (fout, "%f %f lineto\n", x2, y1);
+ fprintf (fout, "%f %f lineto\n", x2, y2);
+ fprintf (fout, "%f %f lineto\n", x3, y2);
+ fprintf (fout, "stroke newpath\n");
+}
+
+double
+trans_x (double X,
+ int sim,
+ double Xmin, double Xmax, double Xrange, float Twidth, float Xoffset)
+{
+ if (sim == 1)
+ {
+ return (((X - Xmin) / Xrange) * (double) Twidth) + Xoffset;
+ }
+ else
+ {
+ return (((Xmax - X) / Xrange) * (double) Twidth) + Xoffset;
+ }
+}
+
+double
+trans_y (double X,
+ double Xmin, double Xmax, double Xrange, float Twidth, float Xoffset)
+{
+ return (((X - Xmin) / Xrange) * (double) Twidth) + Xoffset;
+}
+
+void
+PSPutText (float x, float y, char *text, FILE * outf)
+{
+ fprintf (outf, " %f %f moveto (%s) show \n", x, y, text);
+}
+
+/**********************************************************************
+mark_parents: This works back up the tree setting the entity->lab
+values to 0 for all parents.
+Hopefully the recursion won't run out of space for the size of problem
+we typically deal with...
+----------------------------------------------------------------------*/
+
+void
+mark_parents (struct sc *entity)
+{
+ entity->lab = 0;
+ if (entity->parentA != NULL)
+ {
+ mark_parents (entity->parentA);
+ }
+ if (entity->parentB != NULL)
+ {
+ mark_parents (entity->parentB);
+ }
+}
+
+/*********************************************************************
+write_unclustered:
+
+Work back down the tree and print out the identifiers of entitites that
+are not clustered into any group. If mark_parents has not been called,
+then all entities will be output. Otherwise, only those that have
+not been marked will be output.
+--------------------------------------------------------------------*/
+
+void
+write_unclustered (struct sc *entity, char **idents, FILE * fout)
+{
+ if (entity->lab == 1)
+ {
+ /* this could be an unclustered entity or a cluster
+ containing unclustered entities */
+ if (entity->parentA == NULL && entity->parentB == NULL)
+ {
+ fprintf (fout, "%s\n", idents[entity->members[0]]);
+ return;
+ }
+ /* we don't need both if's since any entitity with one NULL parent
+ must also have the other parent NULL
+ */
+ if (entity->parentA != NULL)
+ {
+ write_unclustered (entity->parentA, idents, fout);
+ }
+ if (entity->parentB != NULL)
+ {
+ write_unclustered (entity->parentB, idents, fout);
+ }
+ }
+}
+
+/*********************************************************************
+ * OC FILE READING FUNCTIONS
+--------------------------------------------------------------------*/
+
+
+/*********************************************************************
+sys_fatal_error:
+
+Pass a string to the function, prints it and the system error, then exits with
+failure.
+--------------------------------------------------------------------*/
+
+void
+sys_fatal_error (const char *error)
+{
+ perror (error);
+ exit (EXIT_FAILURE);
+}
+
+/*********************************************************************
+getline:
+
+Returns a null terminated string representing a line, from a file pointer. Or,
+if the file is empty, NULL.
+--------------------------------------------------------------------*/
+
+#define BUFFER 80 /* Size of buffer for reading in a line */
+
+char *
+getline_loc (FILE * file)
+{
+ int c, i, j;
+ char *line;
+ i = j = 0;
+
+ if ((line = malloc (sizeof (char) * BUFFER)) == NULL)
+ sys_fatal_error ("malloc() failed\n");
+
+ while ((c = getc (file)) != EOF)
+ {
+
+ if (i + 1 % BUFFER == 0)
+ {
+ if ((line = realloc (line, sizeof (char) * BUFFER * ++j)) == NULL)
+ sys_fatal_error ("realloc() failed\n");
+ }
+
+ if (c == (int) '\n')
+ break;
+
+ line[i++] = (char) c;
+ }
+
+ if (i == 0)
+ return NULL;
+
+ if ((line = realloc (line, sizeof (char) * i)) == NULL)
+ sys_fatal_error ("realloc() failed\n");
+
+ line[i] = '\0';
+
+ return line;
+}
+
+/**********************************************************************
+read_num:
+Reads in the number of sequences that are found in the OC file.
+
+Returns an int
+
+-----------------------------------------------------------------------*/
+int
+read_num (FILE * infile)
+{
+ int n;
+ char *line;
+
+
+ /* regex_t cmpregex;
+ const char pattern[] = "^ *[0-9]\\+ *$";
+ */
+
+ if ((line = getline_loc (infile)) == NULL)
+ {
+ fprintf (stderr, "File empty.\n");
+ exit (EXIT_FAILURE);
+ }
+
+ /* comment this stuff out as it fails on SunOS
+
+ if (regcomp(&cmpregex, pattern, REG_NOSUB) != 0) {
+ fprintf(stderr, "Regex compilation failed! Check source code for error.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (regexec(&cmpregex, line, 0, NULL, 0) == REG_NOMATCH) {
+ fprintf(stderr, "File doesn't contain integer number of entries on first line. Line is:\n%s\n", line);
+ exit(EXIT_FAILURE);
+ }
+ */
+
+ n = atoi (line);
+
+ /* regfree(&cmpregex); */
+ free (line);
+ return n;
+}
+
+/**********************************************************************
+read_idents:
+reads n identifiers from infile
+
+returns a pointer to an array of identifiers
+-----------------------------------------------------------------------*/
+
+char **
+read_idents (FILE * infile, int n)
+{
+ char **ret_val;
+ int i;
+ int error;
+ char *buff;
+
+ buff = GJstrcreate (MAX_ID_LEN, NULL);
+ ret_val = (char **) GJmalloc (sizeof (char *) * n);
+
+ for (i = 0; i < n; ++i)
+ {
+ error = fscanf (infile, "%s", buff);
+ if (error == EOF)
+ {
+ fprintf (stderr, "Premature end of file reached in identifiers\n");
+ exit (EXIT_FAILURE);
+ }
+ ret_val[i] = GJstrdup (buff);
+ }
+ return ret_val;
+}
+
+/**********************************************************************
+read_up_diag:
+reads a file containing n, the number of entities, followed by
+n*(n-1)/2 numbers that are the pair distances or similarities between
+the n entities.
+
+Returns an upper diagonal type pointer to pointer array
+
+-----------------------------------------------------------------------*/
+double **
+read_up_diag (FILE * infile, int n)
+{
+ double **ret_val;
+ int error;
+ int i, j;
+
+ ret_val = (double **) GJDudarr (n);
+ for (i = 0; i < (n - 1); ++i)
+ {
+ for (j = 0; j < (n - i - 1); ++j)
+ {
+ error = fscanf (infile, "%lf", &ret_val[i][j]);
+ if (error == EOF)
+ {
+ fprintf (stderr,
+ "Premature end of file reached in upper diagonal\n");
+ exit (EXIT_FAILURE);
+ }
+ }
+ }
+ return ret_val;
+}