* SQUID's interface for multiple sequence alignment
* manipulation: access to the MSA object.
*
- * RCS $Id: msa.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: msa.c,v 1.18 2002/10/12 04:40:35 eddy Exp)
+ * RCS $Id: msa.c 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: msa.c,v 1.18 2002/10/12 04:40:35 eddy Exp)
*/
#include <stdio.h>
msa->sslen = NULL;
msa->sa = NULL;
msa->salen = NULL;
+ msa->co = NULL;
+ msa->colen = NULL;
msa->index = GKIInit();
msa->lastidx = 0;
Free2DArray((void **) msa->sqdesc, msa->nseq);
Free2DArray((void **) msa->ss, msa->nseq);
Free2DArray((void **) msa->sa, msa->nseq);
+ Free2DArray((void **) msa->co, msa->nseq);
if (msa->sqlen != NULL) free(msa->sqlen);
if (msa->wgt != NULL) free(msa->wgt);
if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s",
msa->name != NULL ? msa->name : "");
- msa->alen = msa->sqlen[0];
+ msa->alen = msa->sqlen[0];
/* We can rely on msa->sqname[] being valid for any index,
* because of the way the line parsers always store any name
msa->sqname[idx],
msa->name != NULL ? msa->name : "");
/* all aseq must be same length. */
- if (msa->sqlen[idx] != msa->alen)
+ if (msa->sqlen[idx] != msa->alen)
Die("Parse error: sequence %s: length %d, expected %d in alignment %s",
msa->sqname[idx], msa->sqlen[idx], msa->alen,
msa->name != NULL ? msa->name : "");
}
/* if cons SS is present, must have length right */
- if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen)
+ if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) /* FIXME */
Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s",
strlen(msa->ss_cons), msa->alen,
msa->name != NULL ? msa->name : "");
/* if cons SA is present, must have length right */
- if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen)
+ if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) /* FIXME */
Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s",
strlen(msa->sa_cons), msa->alen,
msa->name != NULL ? msa->name : "");
}
+/* @* MSAVerifyParseDub */
+void
+MSAVerifyParseDub(MSA *msa)
+{
+ int idx;
+
+ if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s",
+ msa->name != NULL ? msa->name : "");
+
+ msa->alen = msa->sqlen[0];
+
+ /* We can rely on msa->sqname[] being valid for any index,
+ * because of the way the line parsers always store any name
+ * they add to the index.
+ */
+ for (idx = 0; idx < msa->nseq; idx++)
+ {
+ /* aseq is required. */
+ if (msa->aseq[idx] == NULL)
+ Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx],
+ msa->name != NULL ? msa->name : "");
+ /* either all weights must be set, or none of them */
+ if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0)
+ Die("Parse error: some weights are set, but %s doesn't have one in alignment %s",
+ msa->sqname[idx],
+ msa->name != NULL ? msa->name : "");
+ /* this is Dublin format, aseq don't have to be same length. */
+ if (msa->sqlen[idx] > msa->alen) {
+ msa->alen = msa->sqlen[idx];
+ }
+ /* if SS is present, must have length right */
+ if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->sqlen[idx])
+ Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s",
+ msa->sqname[idx], msa->sslen[idx], msa->sqlen[idx],
+ msa->name != NULL ? msa->name : "");
+ /* if SA is present, must have length right */
+ if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->sqlen[idx])
+ Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s",
+ msa->sqname[idx], msa->salen[idx], msa->sqlen[idx],
+ msa->name != NULL ? msa->name : "");
+
+ /* if CO is present, must have length right */
+ if (msa->co != NULL && msa->co[idx] != NULL && msa->colen[idx] != msa->sqlen[idx])
+ Die("Parse error: #=GR CO annotation for %s: length %d, expected %d in alignment %s",
+ msa->sqname[idx], msa->colen[idx], msa->sqlen[idx],
+ msa->name != NULL ? msa->name : "");
+ }
+
+
+
+
+ /* Check that all or no weights are set */
+ if (!(msa->flags & MSA_SET_WGT))
+ FSet(msa->wgt, msa->nseq, 1.0); /* default weights */
+
+ /* Clean up a little from the parser */
+ if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; }
+ if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; }
+ if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; }
+ if (msa->colen != NULL) { free(msa->colen); msa->colen = NULL; }
+
+ return;
+} /* this is the end of MSAVerifyParseDub() */
+
/* Function: MSAFileOpen()
case MSAFILE_CLUSTAL: msa = ReadClustal(afp); break;
case MSAFILE_SELEX: msa = ReadSELEX(afp); break;
case MSAFILE_PHYLIP: msa = ReadPhylip(afp); break;
+ case MSAFILE_DUBLIN: msa = ReadDublin(afp); break; /* -> r296, FS */
default:
Die("MSAFILE corrupted: bad format index");
}
}
void
-MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline)
+MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline, int iWrap, int bResno, int iSeqType)
{
switch (outfmt) {
#ifdef CLUSTALO
- case MSAFILE_A2M: WriteA2M(stdout, msa, 0); break;
- case MSAFILE_VIENNA: WriteA2M(stdout, msa, 1); break;
+ /*case MSAFILE_A2M: WriteA2M(stdout, msa, 0); break;*/
+ /*case MSAFILE_VIENNA: WriteA2M(stdout, msa, 1); break;*/
+ case MSAFILE_A2M: WriteA2M(stdout, msa, iWrap); break;
+#ifndef INT_MAX
+#define INT_MAX 2147483647
+#endif
+ case MSAFILE_VIENNA: WriteA2M(stdout, msa, INT_MAX); break;
+ case MSAFILE_CLUSTAL: WriteClustal(stdout, msa, iWrap, bResno, iSeqType); break;
#else
case MSAFILE_A2M: WriteA2M(stdout, msa); break;
-#endif
case MSAFILE_CLUSTAL: WriteClustal(stdout, msa); break;
+#endif
case MSAFILE_MSF: WriteMSF(stdout, msa); break;
case MSAFILE_PHYLIP: WritePhylip(stdout, msa); break;
case MSAFILE_SELEX: WriteSELEX(stdout, msa); break;