2 * This file is part of TISEAN
4 * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
6 * TISEAN is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * TISEAN is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with TISEAN; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*Author: Rainer Hegger Last modified: Jun 10, 2006 */
26 #include "routines/tsa.h"
28 #define WID_STR "Multivariate noise reduction using the GHKSS algorithm"
31 #define BOX (unsigned int)1024
33 unsigned long length=ULONG_MAX,exclude=0;
34 unsigned int dim,qdim=2,delay=1,minn=50,iterations=1,comp=1,embed=5;
35 unsigned int verbosity=0xff;
38 char eps_set=0,euclidean=0,dimset=0,resize_eps;
39 char *outfile=NULL,stdo=1;
42 double *d_min,*d_max,d_max_max;
43 double **series,**delta,**corr;
48 unsigned int ibox=BOX-1;
49 unsigned int *index_comp,*index_embed;
51 /*these are global to save time*/
53 double *av,**mat,*matarray,*eig;
55 void show_options(char *progname)
57 what_i_do(progname,WID_STR);
58 fprintf(stderr,"Usage: %s [options]\n",progname);
59 fprintf(stderr,"Options:\n");
60 fprintf(stderr,"Everything not being a valid option will be interpreted"
62 " datafile.\nIf no datafile is given stdin is read. Just - also"
64 fprintf(stderr,"\t-l # of data to use [Default: whole file]\n");
65 fprintf(stderr,"\t-x # of lines to be ignored [Default: 0]\n");
66 fprintf(stderr,"\t-c column to read [Default: 1,..,# of components]\n");
67 fprintf(stderr,"\t-m # of components,embedding dimension [Default: 1,5]\n");
68 fprintf(stderr,"\t-d delay [Default: 1]\n");
69 fprintf(stderr,"\t-q dimension to project to [Default: 2]\n");
70 fprintf(stderr,"\t-k minimal number of neighbours [Default: 50]\n");
71 fprintf(stderr,"\t-r minimal neighbourhood size \n\t\t"
72 "[Default: (interval of data)/1000]\n");
73 fprintf(stderr,"\t-i # of iterations [Default: 1]\n");
74 fprintf(stderr,"\t-2 use euklidean metric [Default: non euklidean]\n");
75 fprintf(stderr,"\t-o name of output file \n\t\t"
76 "[Default: 'datafile'.opt.n, where n is the iteration.\n\t\t"
77 " If no -o is given, the last iteration is also"
78 " written to stdout]\n");
79 fprintf(stderr,"\t-V verbosity level [Default: 7]\n\t\t"
80 "0='only panic messages'\n\t\t"
81 "1='+ input/output messages'\n\t\t"
82 "2='+ average correction and trend'\n\t\t"
83 "4='+ how many points for which epsilon'\n");
84 fprintf(stderr,"\t-h show these options\n");
88 void scan_options(int n,char **in)
92 if ((out=check_option(in,n,'l','u')) != NULL)
93 sscanf(out,"%lu",&length);
94 if ((out=check_option(in,n,'x','u')) != NULL)
95 sscanf(out,"%lu",&exclude);
96 if ((out=check_option(in,n,'c','s')) != NULL) {
100 if ((out=check_option(in,n,'m','2')) != NULL)
101 sscanf(out,"%u,%u",&comp,&embed);
102 if ((out=check_option(in,n,'d','u')) != NULL)
103 sscanf(out,"%u",&delay);
104 if ((out=check_option(in,n,'q','u')) != NULL)
105 sscanf(out,"%u",&qdim);
106 if ((out=check_option(in,n,'k','u')) != NULL)
107 sscanf(out,"%u",&minn);
108 if ((out=check_option(in,n,'r','f')) != NULL) {
110 sscanf(out,"%lf",&mineps);
112 if ((out=check_option(in,n,'i','u')) != NULL)
113 sscanf(out,"%u",&iterations);
114 if ((out=check_option(in,n,'V','u')) != NULL)
115 sscanf(out,"%u",&verbosity);
116 if ((out=check_option(in,n,'2','n')) != NULL)
118 if ((out=check_option(in,n,'o','o')) != NULL) {
125 void sort(double *x,int *n)
133 for (i=0;i<dim-1;i++)
134 for (j=i+1;j<dim;j++)
154 for (i=emb_offset;i<length;i++) {
155 x=(int)(series[0][i]*ieps)&ibox;
156 y=(int)(series[comp-1][i-emb_offset]*ieps)&ibox;
162 unsigned long fmn(long which,double eps)
165 long i,i1,i2,j,j1,k,k1,li;
169 i=(int)(series[0][which]/eps)&ibox;
170 j=(int)(series[comp-1][which-emb_offset]/eps)&ibox;
172 for (i1=i-1;i1<=i+1;i1++) {
174 for (j1=j-1;j1<=j+1;j1++) {
175 element=box[i2][j1&ibox];
176 while (element != -1) {
177 for (k=0;k<embed;k++) {
179 for (li=0;li<comp;li++) {
180 dx=fabs(series[li][which+k1]-series[li][element+k1]);
189 element=list[element];
196 void make_correction(unsigned long n,unsigned long nf)
198 long i,i1,i2,j,j1,j2,k,k1,k2,hs;
201 for (i=0;i<dim;i++) {
206 help += series[i1][flist[j]-i2];
210 for (i=0;i<dim;i++) {
213 for (j=i;j<dim;j++) {
219 help += series[i1][hs-i2]*series[j1][hs-j2];
221 mat[i][j]=(help/nf-av[i]*av[j])*metric[i]*metric[j];
226 eigen(mat,(unsigned long)dim,eig);
229 for (i=0;i<dim;i++) {
231 for (j=qdim;j<dim;j++) {
233 for (k=0;k<dim;k++) {
236 help += (series[k1][n-k2]-av[k])*mat[k][hs]*mat[i][hs]*metric[k];
239 corr[n][i]=help/metric[i];
243 void handle_trend(unsigned long n,unsigned long nf)
248 for (i=0;i<dim;i++) {
251 help += corr[flist[j]][i];
255 for (i=0;i<dim;i++) {
258 delta[i1][n-i2] += (corr[n][i]-av[i])/(trace*metric[i]);
262 void set_correction(void)
265 double *hav,*hsigma,help;
267 check_alloc(hav=(double*)malloc(sizeof(double)*comp));
268 check_alloc(hsigma=(double*)malloc(sizeof(double)*comp));
270 hav[j]=hsigma[j]=0.0;
272 for (i=0;i<length;i++)
273 for (j=0;j<comp;j++) {
274 hav[j] += (help=delta[j][i]);
275 hsigma[j] += help*help;
278 for (j=0;j<comp;j++) {
280 hsigma[j]=sqrt(fabs(hsigma[j]/length-hav[j]*hav[j]));
282 if (verbosity&(VER_USR1|VER_USR2)) {
283 for (i=0;i<comp;i++) {
284 fprintf(stderr,"Average shift of component %ld = %e\n",i+1,
286 fprintf(stderr,"Average rms correction of comp. %ld = %e\n\n",
287 i+1,hsigma[i]*d_max[i]);
290 for (i=0;i<length;i++)
292 series[j][i] -= delta[j][i];
296 if (verbosity&VER_USR2)
297 fprintf(stderr,"Reset minimal neighbourhood size to %e\n",
306 int main(int argc,char **argv)
309 int iter,epscount,*ok;
313 unsigned long nfound,n,allfound;
318 if (scan_help(argc,argv))
319 show_options(argv[0]);
321 scan_options(argc,argv);
322 #ifndef OMIT_WHAT_I_DO
323 if (verbosity&VER_INPUT)
324 what_i_do(argv[0],WID_STR);
328 emb_offset=(embed-1)*delay;
330 infile=search_datafile(argc,argv,NULL,verbosity);
334 if (outfile == NULL) {
336 check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
337 check_alloc(ofname=(char*)calloc(strlen(infile)+9,(size_t)1));
338 sprintf(outfile,"%s.opt",infile);
341 check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
342 check_alloc(ofname=(char*)calloc((size_t)14,(size_t)1));
343 sprintf(outfile,"stdin.opt");
347 check_alloc(ofname=(char*)calloc(strlen(outfile)+10,(size_t)1));
350 series=(double**)get_multi_series(infile,&length,exclude,&comp,"",
353 series=(double**)get_multi_series(infile,&length,exclude,&comp,column,
357 fprintf(stderr,"With %lu data you will never find %u neighbors."
358 " Exiting!\n",length,minn);
359 exit(GHKSS__TOO_MANY_NEIGHBORS);
362 check_alloc(d_min=(double*)malloc(sizeof(double)*comp));
363 check_alloc(d_max=(double*)malloc(sizeof(double)*comp));
365 for (i=0;i<comp;i++) {
366 rescale_data(series[i],length,&d_min[i],&d_max[i]);
367 if (d_max[i] > d_max_max)
377 check_alloc(box=(long**)malloc(sizeof(long*)*BOX));
379 check_alloc(box[i]=(long*)malloc(sizeof(long)*BOX));
381 check_alloc(list=(long*)malloc(sizeof(long)*length));
382 check_alloc(flist=(unsigned long*)malloc(sizeof(long)*length));
384 check_alloc(metric=(double*)malloc(sizeof(double)*dim));
387 for (i=0;i<dim;i++) {
389 trace += 1./metric[i];
393 for (i=0;i<dim;i++) {
394 if ((i >= comp) && (i < ((long)dim-(long)comp)))
398 trace += 1./metric[i];
402 check_alloc(corr=(double**)malloc(sizeof(double*)*length));
403 for (i=0;i<length;i++)
404 check_alloc(corr[i]=(double*)malloc(sizeof(double)*dim));
405 check_alloc(ok=(int*)malloc(sizeof(int)*length));
406 check_alloc(delta=(double**)malloc(sizeof(double*)*comp));
408 check_alloc(delta[i]=(double*)malloc(sizeof(double)*length));
409 check_alloc(index_comp=(unsigned int*)malloc(sizeof(int)*dim));
410 check_alloc(index_embed=(unsigned int*)malloc(sizeof(int)*dim));
411 check_alloc(av=(double*)malloc(sizeof(double)*dim));
412 check_alloc(sorted=(int*)malloc(sizeof(int)*dim));
413 check_alloc(eig=(double*)malloc(sizeof(double)*dim));
414 check_alloc(matarray=(double*)malloc(sizeof(double)*dim*dim));
415 check_alloc(mat=(double**)malloc(sizeof(double*)*dim));
417 mat[i]=(double*)(matarray+dim*i);
418 check_alloc(hser=(double**)malloc(sizeof(double*)*comp));
420 for (i=0;i<dim;i++) {
421 index_comp[i]=i%comp;
422 index_embed[i]=(i/comp)*delay;
426 for (iter=1;iter<=iterations;iter++) {
427 for (i=0;i<length;i++) {
438 if (verbosity&(VER_USR1|VER_USR2))
439 fprintf(stderr,"Starting iteration %d\n",iter);
443 for (n=emb_offset;n<length;n++)
445 nfound=fmn(n,epsilon);
446 if (nfound >= minn) {
447 make_correction(n,nfound);
456 if (verbosity&VER_USR2)
457 fprintf(stderr,"Corrected %ld points with epsilon= %e\n",allfound,
462 if (verbosity&VER_USR2)
463 fprintf(stderr,"Start evaluating the trend\n");
467 for (i=1;i<epscount;i++) {
469 for (n=emb_offset;n<length;n++)
471 nfound=fmn(n,epsilon);
472 handle_trend(n,nfound);
475 if (verbosity&VER_USR2)
476 fprintf(stderr,"Trend subtracted for %ld points with epsilon= %e\n",
477 allfound,epsilon*d_max_max);
482 sprintf(ofname,"%s.%d",outfile,iter);
483 test_outfile(ofname);
485 file=fopen(ofname,"w");
486 if (verbosity&VER_INPUT)
487 fprintf(stderr,"Opened %s for writing\n\n",ofname);
488 for (i=0;i<length;i++) {
489 for (j=0;j<comp;j++) {
490 fprintf(file,"%e ",series[j][i]*d_max[j]+d_min[j]);
493 if (stdo && (iter == iterations)) {
495 fprintf(stdout,"%e ",series[j][i]*d_max[j]+d_min[j]);
496 fprintf(stdout,"\n");