1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
12 * SRE, Thu Mar 3 13:43:39 1994
14 * Calculate weights for a sequence alignment.
15 * CVS $Id: weight_main.c,v 1.1.1.1 2005/03/22 08:34:30 cmzmasek Exp $
26 static char banner[] = "weight - calculate sequence weights for an alignment";
28 static char usage[] = "\
29 Usage: weight [-options] <alignment file>\n\
31 -b <f> : use BLOSUM weighting scheme at <f> fractional identity\n\
32 -f <f> : filter out seqs w/ fractional ident > <x> [0-1]\n\
33 -h : help; print version and usage info\n\
34 -o <file> : save weight-annotated alignment in <outfile>\n\
35 -p : use position based weight scheme (Henikoff & Henikoff)\n\
36 -s <n> : sample <n> sequences at random into a new alignment\n\
37 -v : use Voronoi weight scheme (Sibbald & Argos) \n\
40 static char experts[] = "\
42 --informat <s> : specify alignment file format <s>\n\
43 allowed formats: SELEX, MSF, Clustal, a2m, PHYLIP\n\
44 --quiet : suppress verbose banner\n\
47 static struct opt_s OPTIONS[] = {
48 { "-b", TRUE, sqdARG_FLOAT },
49 { "-f", TRUE, sqdARG_FLOAT },
50 { "-h", TRUE, sqdARG_NONE },
51 { "-o", TRUE, sqdARG_STRING },
52 { "-p", TRUE, sqdARG_NONE },
53 { "-s", TRUE, sqdARG_INT },
54 { "-v", TRUE, sqdARG_NONE },
55 { "--informat", FALSE, sqdARG_STRING },
56 { "--quiet", FALSE, sqdARG_NONE },
58 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
61 main(int argc, char **argv)
63 char *seqfile; /* file containing aligned seqs */
64 MSAFILE *afp; /* pointer to open alignment file */
65 MSA *msa; /* multiple sequence alignment */
66 int fmt; /* expected format of alignment file */
68 char *outfile; /* output file for weighted alignment */
69 FILE *ofp; /* open outfile */
71 int do_voronoi; /* use Sibbald/Argos Voronoi scheme */
72 int do_blosum; /* use BLOSUM weighting scheme */
73 int do_pbased; /* use position-based weights */
74 int do_filter; /* use filtering scheme */
75 float idlevel; /* identity level to filter at, [0-1] */
76 int samplesize; /* if >0, don't weight, random sample */
77 int be_quiet; /* TRUE to suppress banner */
79 char *optname; /* name of option found by Getopt() */
80 char *optarg; /* argument found by Getopt() */
81 int optind; /* index in argv[] */
83 /***********************************************
85 ***********************************************/
87 fmt = MSAFILE_UNKNOWN; /* autodetect file format by default */
95 idlevel = 0.; /* just to suppress gcc uninit warnings */
97 while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
98 &optind, &optname, &optarg))
100 if (strcmp(optname, "-b") == 0)
101 { do_blosum = TRUE; idlevel = atof(optarg); }
102 else if (strcmp(optname, "-f") == 0)
103 { do_filter = TRUE; idlevel = atof(optarg); }
104 else if (strcmp(optname, "-o") == 0) outfile = optarg;
105 else if (strcmp(optname, "-p") == 0) do_pbased = TRUE;
106 else if (strcmp(optname, "-s") == 0) samplesize = atoi(optarg);
107 else if (strcmp(optname, "-v") == 0) do_voronoi = TRUE;
108 else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE;
109 else if (strcmp(optname, "--informat") == 0) {
110 fmt = String2SeqfileFormat(optarg);
111 if (fmt == MSAFILE_UNKNOWN)
112 Die("unrecognized sequence file format \"%s\"", optarg);
113 if (! IsAlignmentFormat(fmt))
114 Die("%s is an unaligned format, can't read as an alignment", optarg);
116 else if (strcmp(optname, "-h") == 0)
118 Banner(stdout, banner);
125 if (argc -optind != 1)
126 Die("Wrong number of arguments specified on command line\n%s\n", usage);
127 seqfile = argv[optind];
131 else if ((ofp = fopen(outfile, "w")) == NULL)
132 Die("Failed to open alignment output file %s", outfile);
134 if (do_voronoi + do_pbased + do_blosum + do_filter + samplesize > 1)
135 Die("Choose only one weighting scheme, please.\n%s\n", usage);
137 if (do_voronoi || samplesize > 0)
138 sre_srandom(time(0));
141 Banner(stdout, banner);
143 /***********************************************
144 * Open the input alignment file and start...
145 * be prepared to deal with multiple entries in Stockholm files
146 ***********************************************/
148 if ((afp = MSAFileOpen(seqfile, fmt, NULL)) == NULL)
149 Die("Alignment file %s could not be opened for reading", seqfile);
151 while ((msa = MSAFileRead(afp)) != NULL)
153 for (idx = 0; idx < msa->nseq; idx++)
154 s2upper(msa->aseq[idx]);
156 if (do_filter || samplesize > 0)
161 FilterAlignment(msa, idlevel, &new);
162 else if (samplesize > 0)
163 SampleAlignment(msa, samplesize, &new);
166 WriteStockholm(ofp, new);
173 if (do_voronoi) VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
174 else if (do_blosum) BlosumWeights(msa->aseq, msa->nseq, msa->alen, idlevel, msa->wgt);
175 else if (do_pbased) PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
176 else GSCWeights (msa->aseq, msa->nseq, msa->alen, msa->wgt);
178 msa->flags |= MSA_SET_WGT;
179 WriteStockholm(ofp, msa);