/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ /********************************************************************* * Clustal Omega - Multiple sequence alignment * * Copyright (C) 2010 University College Dublin * * Clustal-Omega is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This file is part of Clustal-Omega. * ********************************************************************/ /* * RCS $Id: hhutil-C.h 246 2011-06-15 12:41:04Z fabian $ */ ////////////////////////////////////////////////////////////////////////////// // Transform a character to lower case and '.' to '-' and vice versa ////////////////////////////////////////////////////////////////////////////// inline char MatchChr(char c) {return ((c>='a' && c<='z')? c-'a'+'A' : (c=='.'? '-':c) );} inline char InsertChr(char c) {return ((c>='A' && c<='Z')? c+'a'-'A' : ((c>='0' && c<='9') || c=='-')? '.':c );} inline int WordChr(char c) {return (int)((c>='A' && c<='Z') || (c>='a' && c<='z'));} ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms the one-letter amino acid code into an integer between 0 and 22 */ inline char aa2i(char c) { //A R N D C Q E G H I L K M F P S T W Y V if (c>='a' && c<='z') c+='A'-'a'; switch (c) { case 'A': return 0; case 'R': return 1; case 'N': return 2; case 'D': return 3; case 'C': return 4; case 'Q': return 5; case 'E': return 6; case 'G': return 7; case 'H': return 8; case 'I': return 9; case 'L': return 10; case 'K': return 11; case 'M': return 12; case 'F': return 13; case 'P': return 14; case 'S': return 15; case 'T': return 16; case 'W': return 17; case 'Y': return 18; case 'V': return 19; case 'X': return ANY; case 'J': return ANY; case 'O': return ANY; case 'U': return 4; //Selenocystein -> Cystein case 'B': return 3; //D (or N) case 'Z': return 6; //E (or Q) case '-': return GAP; case '.': return GAP; case '_': return GAP; } if (c>=0 && c<=32) return -1; // white space and control characters return -2; } /////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms integers between 0 and 22 into the one-letter amino acid code */ inline char i2aa(char c) { //A R N D C Q E G H I L K M F P S T W Y V switch (c) { case 0: return 'A'; case 1: return 'R'; case 2: return 'N'; case 3: return 'D'; case 4: return 'C'; case 5: return 'Q'; case 6: return 'E'; case 7: return 'G'; case 8: return 'H'; case 9: return 'I'; case 10: return 'L'; case 11: return 'K'; case 12: return 'M'; case 13: return 'F'; case 14: return 'P'; case 15: return 'S'; case 16: return 'T'; case 17: return 'W'; case 18: return 'Y'; case 19: return 'V'; case ANY: return 'X'; case GAP: return '-'; case ENDGAP: return '-'; } return '?'; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms the dssp/psipred secondary structure code into an integer number */ inline char ss2i(char c) { //- H E C S T G B if (c>='a' && c<='z') c+='A'-'a'; switch (c) { case '.': return 0; case '-': return 0; case 'X': return 0; case 'H': return 1; case 'E': return 2; case 'C': return 3; case '~': return 3; case 'S': return 4; case 'T': return 5; case 'G': return 6; case 'B': return 7; case 'I': return 3; case ' ': return -1; case '\t': return -1; case '\n': return -1; } return -2; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms integers between 0 and 8 into the dssp/psipred secondary structure code */ inline char i2ss(int c) { //- H E C S T G B switch (c) { case 0: return '-'; case 1: return 'H'; case 2: return 'E'; case 3: return 'C'; case 4: return 'S'; case 5: return 'T'; case 6: return 'G'; case 7: return 'B'; case 8: return 'I'; } return '?'; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms the solvend accessiblity code into an integer number */ inline char sa2i(char c) { //- A B C D E if (c>='a' && c<='z') c+='A'-'a'; switch (c) { case '.': return 0; case '-': return 0; case 'A': return 1; case 'B': return 2; case 'C': return 3; case 'D': return 4; case 'E': return 5; case 'F': return 6; case ' ': return -1; case '\t': return -1; case '\n': return -1; } return -2; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms integers between 0 and 5 into the solvent accessibility code */ inline char i2sa(int c) { //- H E C S T G B switch (c) { case 0: return '-'; case 1: return 'A'; case 2: return 'B'; case 3: return 'C'; case 4: return 'D'; case 5: return 'E'; case 6: return 'F'; } return '?'; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms alternative secondary structure symbols into symbols */ inline char ss2ss(char c) { //- H E C S T G B switch (c) { case '~': return 'C'; case 'I': return 'C'; case 'i': return 'c'; case 'H': case 'E': case 'C': case 'S': case 'T': case 'G': case 'B': case 'h': case 'e': case 'c': case 's': case 't': case 'g': case 'b': case '.': return c; } return '-'; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms confidence values of psipred into internal code */ inline char cf2i(char c) { switch (c) { case '-': return 0; case '.': return 0; case '0': return 1; case '1': return 2; case '2': return 3; case '3': return 4; case '4': return 5; case '5': return 6; case '6': return 7; case '7': return 8; case '8': return 9; case '9': return 10; } return 0; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Transforms internal representation of psipred confidence values into printable chars */ inline char i2cf(char c) { switch (c) { case 0: return '-'; case 1: return '0'; case 2: return '1'; case 3: return '2'; case 4: return '3'; case 5: return '4'; case 6: return '5'; case 7: return '6'; case 8: return '7'; case 9: return '8'; case 10: return '9'; } return '-'; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Fast lookup of log2(1+2^(-x)) for x>=0 (precision < 0.35%) */ inline float fast_addscore(float x) { static float val[2001]; // val[i]=log2(1+2^(-x)) static char initialized; if (x>20) return 0.0; if (x<0) { fprintf(stderr,"Error in function fast_addscore: argument %g is negative\n",x); exit(7); } if (!initialized) //First fill in the log2-vector { for (int i=0; i<=2000; i++) val[i]=log2(1.0+pow(2,-0.01*(i+0.5))); initialized=1; } return val[(int)(100.0*x)]; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Little utilities for output */ inline void fout(FILE* outf, int d) { if (d>=99999) fprintf(outf,"*\t"); else fprintf(outf,"%i\t",d); return; } ////////////////////////////////////////////////////////////////////////////// /** * @brief Errors */ int FormatError(const char infile[], const char details[]="") { cerr<<"Error in "<0 && outf.getline(line,LINELEN); n--) cout<=3) cerr<<"Warning: could not find ./.hhdefaults or "< need no warning // Scan file until line 'program_nameANYTHING' while (fgets(line,LINELEN,configf)) if (!strncmp(line,program_name,6)) break; // Found line 'program_nameANYTHING'? if (!strncmp(line,program_name,6)) { // Read in options until end-of-file or empty line while (fgets(line,LINELEN,configf) && strcmp(line,"\n")) { // Analyze line c=line; do { // Find next word while (*c==' ' || *c=='\t') c++; //Advance until next non-white space if (*c=='\0' || *c=='\n' || *c=='#') break; //Is next word empty string? c_first=c; while (*c!=' ' && *c!='\t' && *c!='#' && *c!='\0' && *c!='\n' ) c++; //Advance until next white space or '#' if (*c=='\0' || *c=='\n' || *c=='#') //Is end of line reached? { *c='\0'; argv_conf[argc_conf]=new(char[strlen(c_first)+1]); strcpy(argv_conf[argc_conf++],c_first); break; } *c='\0'; argv_conf[argc_conf]=new(char[strlen(c_first)+1]); strcpy(argv_conf[argc_conf++],c_first); printf("Argument: %s\n",c_first); c++; } while (1); } //end read line if (v>=3) { cout<<"Arguments read in from .hhdefaults:"; for (int argc=1; argc=3) cout<<"Read in "<=3) cerr<0 weighs columns according to their intra-clomun similarity par.gapb=1.405220; // default values for transition pseudocounts par.gapd=1.316760; // gap open penalty pseudocount; 0.25 corresponds to 7.1*gapf bits par.gape=1.793780; // gap extension penalty pseudocount par.gapf=1.034710; // factor for increasing gap open penalty for deletes par.gapg=0.894772; // factor for increasing gap open penalty for inserts par.gaph=0.544072; // factor for increasing gap extension penalty for deletes par.gapi=0.862559; // factor for increasing gap extension penalty for inserts #else /* Soeding's default*/ par.pca=1.0f; // default values for substitution matrix pseudocounts par.pcb=1.5f; // significant reduction of pcs by Neff_M starts around Neff_M-1=pcb par.pcc=1.0f; // pcs are reduced prop. to 1/Neff^pcc par.pcw=0.0f; // wc>0 weighs columns according to their intra-clomun similarity par.gapb=1.0; // default values for transition pseudocounts par.gapd=0.15; // gap open penalty pseudocount; 0.25 corresponds to 7.1*gapf bits par.gape=1.0; // gap extension penalty pseudocount par.gapf=0.6; // factor for increasing gap open penalty for deletes par.gapg=0.6; // factor for increasing gap open penalty for inserts par.gaph=0.6; // factor for increasing gap extension penalty for deletes par.gapi=0.6; // factor for increasing gap extension penalty for inserts #endif #if 0 /* Viterbi parameters optimised on Sabre (R228), FS, r228 -> r229 */ par.pcaV=1.245150f; // default values for substitution matrix pseudocounts par.pcbV=1.682110f; // significant reduction of pcs by Neff_M starts around Neff_M-1=pcb par.pccV=1.483840f; // pcs are reduced prop. to 1/Neff^pcc par.pcwV=0.0f; // wc>0 weighs columns according to their intra-clomun similarity par.gapbV=0.818625; // default values for transition pseudocounts par.gapdV=0.666110; // gap open penalty pseudocount; 0.25 corresponds to 7.1*gapf bits par.gapeV=1.028050; // gap extension penalty pseudocount par.gapfV=0.710760; // factor for increasing gap open penalty for deletes par.gapgV=1.649800; // factor for increasing gap open penalty for inserts par.gaphV=0.470604; // factor for increasing gap extension penalty for deletes par.gapiV=0.829479; // factor for increasing gap extension penalty for inserts #elif 1 /* Viterbi parameters optimised on Balibase, r244 -> r245 */ par.pcaV=1.333860f; // default values for substitution matrix pseudocounts par.pcbV=1.934480f; // significant reduction of pcs by Neff_M starts around Neff_M-1=pcb par.pccV=1.655610f; // pcs are reduced prop. to 1/Neff^pcc par.pcwV=0.0f; // wc>0 weighs columns according to their intra-clomun similarity par.gapbV=0.334525; // default values for transition pseudocounts par.gapdV=0.074534; // gap open penalty pseudocount; 0.25 corresponds to 7.1*gapf bits par.gapeV=0.320336; // gap extension penalty pseudocount par.gapfV=0.151634; // factor for increasing gap open penalty for deletes par.gapgV=0.641516; // factor for increasing gap open penalty for inserts par.gaphV=0.266434; // factor for increasing gap extension penalty for deletes par.gapiV=0.598414; // factor for increasing gap extension penalty for inserts #else /* Soeding default*/ par.pcaV=1.0f; // default values for substitution matrix pseudocounts par.pcbV=1.5f; // significant reduction of pcs by Neff_M starts around Neff_M-1=pcb par.pccV=1.0f; // pcs are reduced prop. to 1/Neff^pcc par.pcwV=0.0f; // wc>0 weighs columns according to their intra-clomun similarity par.gapbV=1.0; // default values for transition pseudocounts par.gapdV=0.15; // gap open penalty pseudocount; 0.25 corresponds to 7.1*gapf bits par.gapeV=1.0; // gap extension penalty pseudocount par.gapfV=0.6; // factor for increasing gap open penalty for deletes par.gapgV=0.6; // factor for increasing gap open penalty for inserts par.gaphV=0.6; // factor for increasing gap extension penalty for deletes par.gapiV=0.6; // factor for increasing gap extension penalty for inserts #endif par.ssm=2; // ss scoring mode: 0:no ss score 1:score after alignment 2:score during alignment par.ssw=0.11f; // weight of ss scoring par.ssa=1.0f; // weight of ss evolution matrix par.shift=-0.01f; // Shift match score up par.mact=0.3001f; // Score threshold for MAC alignment in local mode (set to 0.5001 to track user modification) par.corr=0.1f; // Weight of correlations of scores for |i-j|<=4 par.wstruc=1.0f; // Weight of structure scores par.egq=0.0f; // no charge for end gaps as default par.egt=0.0f; // no charge for end gaps as default par.trans=0; // no transitive scoring as default par.Emax_trans=100.0f; // use intermediate HMMs with E-values up to 100 between query and database HMM par.Emax_trans=100.0f; // use intermediate HMMs with E-values up to 100 between query and database HMM par.wtrans=1.0f; // Ztot[k] = Zq[k] + wtrans * (Zforward[k]+Zreverse[k]) par.ssgap=0; // 1: add secondary structure-dependent gap penalties 0:off par.ssgapd=1.0f; // secondary structure-dependent gap-opening penalty (per residue) par.ssgape=0.0f; // secondary structure-dependent gap-extension penalty (per residue) par.ssgapi=4; // max. number of inside-integer(ii); gap-open-penalty= -ii*ssgapd par.loc=1; // local vs. global alignment as default par.altali=2; // find up to two (possibly overlapping) subalignments par.forward=0; // 0: Viterbi algorithm; 1: Viterbi+stochastic sampling; 3:Maximum Accuracy (MAC) algorithm par.realign=1; // realign with MAC algorithm par.repmode=0; // repeats score independently of one another par.columnscore=1; // Default column score is 1: null model pnul = 1/2 * (q_av(a)+p_av(a)) par.min_overlap=0; // automatic minimum overlap used par.opt=0; // Default = optimization mode off par.readdefaultsfile=0; // Default = do not read a defaults file ./.hhdefaults or HOME/.hhdefaults par.maxdbstrlen=200; // maximum length of database string to be printed in 'Command' line of hhr file par.mode=0; par.idummy=par.jdummy=0; // par.notags=1; // neutralize His-tags, FLAG-tags, C-myc-tags // Initialize strings strcpy(par.infile,"stdin"); strcpy(par.outfile,""); strcpy(par. pairwisealisfile,""); strcpy(par.buffer,"buffer.txt"); strcpy(par.scorefile,""); strcpy(par.wfile,""); strcpy(par.alnfile,""); strcpy(par.hhmfile,""); strcpy(par.psifile,""); par.exclstr=NULL; #if 0 /* read parameter file from home-dir */ #include "hhutil-C-help.h" #endif /* read parameter file from home-dir */ return; } /** this is the end of SetDefaults() **/ /* * EOF hhutil-C.h */