/********* Sequence input routines for CLUSTAL W *******************/ /* DES was here. FEB. 1994 */ /* Now reads PILEUP/MSF and CLUSTAL alignment files */ #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" /* * Prototypes */ extern Boolean linetype(char *,char *); extern Boolean blankline(char *); extern void warning(char *,...); extern void error(char *,...); extern char * rtrim(char *); extern char * blank_to_(char *); extern void getstr(char *,char *); void fill_chartab(void); static void get_seq(char *,char *,int *,char *); static void get_clustal_seq(char *,char *,int *,char *,int); static void get_msf_seq(char *,char *,int *,char *,int); static void check_infile(int *); static int count_clustal_seqs(void); static int count_msf_seqs(void); /* * Global variables */ static FILE *fin; char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */ char *nucleic_acid_order = "ACGTUN"; static int seqFormat; static char chartab[128]; void fill_chartab(void) /* Create translation and check table */ { register int i; register int c; for(i=0;i<128;chartab[i++]=0); for(i=0,c=0;c<=amino_acid_codes[i];i++) chartab[c]=chartab[tolower(c)]=c; } static void get_msf_seq(char *sname,char *seq,int *len,char *tit,int seqno) /* read the seqno_th. sequence from a PILEUP multiple alignment file */ { static char *line; int i,j,k; unsigned char c; if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); fseek(fin,0,0); /* start at the beginning */ *len=0; /* initialise length to zero */ for(i=0;;i++) { if(fgets(line,MAXLINE+1,fin)==NULL) return; /* read the title*/ if(linetype(line,"/") ) break; /* lines...ignore*/ } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) { for(i=1;i') break; /* EOL */ if( (c=chartab[c])) {seq[++(*len)]=c; } } if(*len == SEQ_MAX_LEN || c == '>') break; } break; /**********************************************/ case GDE: while(*line != '#' ||*line != '%' ) fgets(line,MAXLINE+1,fin); for (i=1;i<=MAXNAMES;i++) { if (line[i] == '(' || line[i] == '\n') { i--; break; } sname[i-1] = line[i]; } sname[i]=EOS; offset=0; if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset); else offset = 0; for(i=MAXNAMES-1;i > 0;i--) if(isspace(sname[i])) { sname[i]=EOS; break; } blank_to_(sname); *tit=EOS; *len=0; for (i=0;i SEQ_MAX_LEN) { error("Sequence too long. Maximum is %d",(pint)SEQ_MAX_LEN); return 0; /* also return zero if too many */ } for ( a=0; a') { /* no */ seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */ (*nseqs)++; } else if((*line == '"') || (*line == '%') || (*line == '#')) { seqFormat=GDE; /* GDE format */ if (*line == '%') { (*nseqs)++; } else if (*line == '#') { (*nseqs)++; } } else { seqFormat=UNKNOWN; return; } while(fgets(line,MAXLINE+1,fin) != NULL) { switch(seqFormat) { case EMBLSWISS: if( linetype(line,"ID") ) (*nseqs)++; break; case PIR: case PEARSON: if( *line == '>' ) (*nseqs)++; break; case GDE: if(( *line == '%' ) ) (*nseqs)++; else if (( *line == '#') ) (*nseqs)++; break; case CLUSTAL: *nseqs = count_clustal_seqs(); /* DES */ /* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */ fseek(fin,0,0); return; break; case MSF: *nseqs = count_msf_seqs(); fseek(fin,0,0); return; break; case USER: default: break; } } fseek(fin,0,0); } static int count_clustal_seqs(void) /* count the number of sequences in a clustal alignment file */ { static char *line; int nseqs; if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(blankline(line)) return nseqs; nseqs++; } return 0; /* if you got to here-funny format/no seqs.*/ } static int count_msf_seqs(void) { /* count the number of sequences in a PILEUP alignment file */ static char *line; int nseqs; if ( !line)line=vcalloc ( (MAXLINE+1), sizeof (char)); while (fgets(line,MAXLINE+1,fin) != NULL) { if(linetype(line,"/")) break; } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(blankline(line)) return nseqs; nseqs++; } return 0; /* if you got to here-funny format/no seqs.*/ } /******************************COPYRIGHT NOTICE*******************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ /* T-COFFEE is free software; you can redistribute it and/or modify*/ /* it under the terms of the GNU General Public License as published by*/ /* the Free Software Foundation; either version 2 of the License, or*/ /* (at your option) any later version.*/ /**/ /* T-COFFEE is distributed in the hope that it will be useful,*/ /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ /* GNU General Public License for more details.*/ /**/ /* You should have received a copy of the GNU General Public License*/ /* along with Foobar; if not, write to the Free Software*/ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ /*............................................... |*/ /* If you need some more information*/ /* cedric.notredame@europe.com*/ /*............................................... |*/ /**/ /**/ /* */ /******************************COPYRIGHT NOTICE*******************************/