1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
6 * Copyright (C) 2010 University College Dublin
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This file is part of Clustal-Omega.
15 ********************************************************************/
18 * RCS $Id: hhhitlist.h 243 2011-05-31 13:49:19Z fabian $
23 //////////////////////////////////////////////////////////////////////////////
24 // HitList is a list of hits of type Hit
25 // which can be operated upon by several anaylsis methods
26 //////////////////////////////////////////////////////////////////////////////
27 class HitList : public List<Hit>
30 double score[MAXPROF]; // HHsearch score of each HMM for ML fit
31 double weight[MAXPROF]; // weight of each HMM = 1/(size_fam[tfam]*size_sfam[hit.sfam]) for ML fit
32 int Nprof; // Number of HMMs for ML fit
35 int fams; // number of families found found in hitlist
36 int sfams; // number of superfamilies found in hitlist
37 int N_searched; // number of sequences searched from HMM database
38 Hash<float>* blast_logPvals;/* Hash containing names and log(P-values)
39 read from BLAST file (needed for HHblast) */
41 HitList() {blast_logPvals=NULL;}
42 ~HitList() {if (blast_logPvals) delete blast_logPvals;}
44 // Print summary listing of hits
45 void PrintHitList(HMM& q, char* outfile);
48 void ClobberGlobal(void);
51 // Print alignments of query sequences against hit sequences
54 char **ppcFirstProf, char **ppcSecndProf,
56 HMM& q, char* outfile, char outformat=0);
58 // Return a figure of merit for distinction of the score with positive from the scores with negatives
59 void Optimize(HMM& q, char* buffer);
61 // Print score distribution into file score_dist
62 void PrintScoreFile(HMM& q);
64 // Log likelihood for fitting the EVD to the score distribution
65 double RankOrderFitCorr(double* v);
66 // Static wrapper-function for calling the nonstatic member function RankOrderFitCorr()
67 static double RankOrderFitCorr_static(void* pt2hitlist, double* v);
69 // Log likelihood for fitting the EVD to the score distribution
70 double LogLikelihoodCorr(double* v);
71 // Static wrapper-function for calling the nonstatic member function LogLikelihoodCorr()
72 static double LogLikelihoodCorr_static(void* pt2hitlist, double* v);
74 // Log likelihood for fitting the EVD to the score distribution
75 double LogLikelihoodEVD(double* v);
76 // Static wrapper-function for calling the nonstatic member function LogLikelihoodEVD()
77 static double LogLikelihoodEVD_static(void* pt2hitlist, double* v);
80 // Subroutine to FindMin: new point given by highest point ihigh, fac and replaces ihigh if it is lower
81 double TryPoint(int ndim, double* p, double* y, double* psum, int ihigh, double fac, double (*Func)(void* pt2hitlist, double* v));
83 // Find minimum with simplex method of Nelder and Mead (1965)
84 float FindMin(int ndim, double* p, double* y, double tol, int& nfunc, double (*Func)(void* pt2hitlist, double* v));
86 // Do a maximum likelihood fit of the scores with an EV distribution with parameters lamda and mu
87 void MaxLikelihoodEVD(HMM& q, int nbest);
89 // Calculate correlation and score offset for HHblast composite E-values
90 void CalculateHHblastCorrelation(HMM& q);
92 // Calculate HHblast composite E-values
93 void CalculateHHblastEvalues(HMM& q);
95 // Calculate Pvalues as a function of query and template lengths and diversities
96 void CalculatePvalues(HMM& q);
98 // Set P-values, E-values and scores according to q.lamda and q.mu (if calibration from database scan is impossible)
99 void GetPvalsFromCalibration(HMM& q);
101 // HHblast: read PSI-BLAST E-values to determine correlation
102 void ReadBlastFile(HMM& q);
104 // Print first 20 hits of hitlist
110 printf("TARGET FAMILY LEN COL LOG-PVA S-AASS PROBAB SCORE_SORT\n");
111 while (++i<=20 && !End())
114 printf("%-10.10s %-10.10s %3i %3i %s %7.2f %6.2f %6.2f\n",hit.name,hit.fam,hit.L,hit.matched_cols,sprintg(-1.443*hit.logPval,7),-hit.score_aass,hit.Probab,hit.score_sort);
118 // Calculate P-values and Probabilities from transitive scoring over whole database
119 void TransitiveScoring();
120 void TransitiveScoring2();
121 void TransitiveScoring3();
122 void TransitiveScoring4();
124 // Score2Z transforms the -log(P-value) score into a Z-score for 0 < S
125 double Score2Z(double S);
127 // Z2Score transforms the Z-score into a -log(P-value) value
128 double Z2Score(double Z);
130 // Matrix manipulation
131 void PrintMatrix(float** V, int N);
132 void PrintMatrix(double** V, int N);
133 float NormalizationFactor(double** Csub,float* w, int M);
134 void Normalize(float* Ztq, char** fold, Hash<int>& excluded);
135 void InvertMatrix(double** B, double** A, int N);
136 void TransposeMatrix(double** V, int N);
137 void SVD(double **A, int n, double w[], double **V);