2 * This file is part of TISEAN
4 * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
6 * TISEAN is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * TISEAN is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with TISEAN; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*Author: Rainer Hegger */
22 Sep 8, 2006: Add -o functionality
23 Sep 7, 2006: Completely rewritten to handle multivariate data
29 #include "routines/tsa.h"
32 #define WID_STR "Estimates the average forecast error of a local\n\t\
36 /*number of boxes for the neighbor search algorithm*/
39 unsigned int nmax=(NMAX-1),comp1,hdim,**indexes;
41 unsigned long *found,*hfound;
44 double **mat,**imat,*vec,*localav,*foreav;
46 char epsset=0,causalset=0;
47 unsigned int verbosity=VER_INPUT|VER_FIRST_LINE;
48 unsigned int COMP=1,EMBED=2,DIM,DELAY=1,MINN=30,STEP=1;
49 double EPS0=1.e-3,EPSF=1.2;
50 unsigned long LENGTH=ULONG_MAX,exclude=0,CLENGTH=ULONG_MAX,causal;
51 char *infile=NULL,*COLUMN=NULL,*outfile=NULL;
52 char dimset=0,stout=1;
54 void show_options(char *progname)
56 what_i_do(progname,WID_STR);
57 fprintf(stderr," Usage: %s [options]\n",progname);
58 fprintf(stderr," Options:\n");
59 fprintf(stderr,"Everything not being a valid option will be interpreted"
61 " datafile.\nIf no datafile is given stdin is read. Just - also"
63 fprintf(stderr,"\t-l # of data to use [default: whole file]\n");
64 fprintf(stderr,"\t-x # of lines to be ignored [default: 0]\n");
65 fprintf(stderr,"\t-c columns to read [default: 1]\n");
66 fprintf(stderr,"\t-m # of components, embedding dimension "
67 "[default: %u,%u]\n",COMP,EMBED);
68 fprintf(stderr,"\t-d delay [default: 1]\n");
69 fprintf(stderr,"\t-n iterations [default: length]\n");
70 fprintf(stderr,"\t-k minimal number of neighbors for the fit "
72 fprintf(stderr,"\t-r neighborhoud size to start with "
73 "[default: (data interval)/1000]\n");
74 fprintf(stderr,"\t-f factor to increase size [default: 1.2]\n");
75 fprintf(stderr,"\t-s steps to forecast [default: 1]\n");
76 fprintf(stderr,"\t-C width of causality window [default: steps]\n");
77 fprintf(stderr,"\t-o output file [default 'datafile'.fce"
78 " no -o means write to stdout]\n");
79 fprintf(stderr,"\t-V verbosity level [default: 1]\n\t\t"
80 "0='only panic messages'\n\t\t"
81 "1='+ input/output messages'\n\t\t"
82 "2='+ print indiviual forecast errors'\n");
83 fprintf(stderr,"\t-h show these options\n");
87 void scan_options(int n,char **in)
91 if ((out=check_option(in,n,'l','u')) != NULL)
92 sscanf(out,"%lu",&LENGTH);
93 if ((out=check_option(in,n,'x','u')) != NULL)
94 sscanf(out,"%lu",&exclude);
95 if ((out=check_option(in,n,'c','s')) != NULL) {
99 if ((out=check_option(in,n,'m','2')) != NULL)
100 sscanf(out,"%u,%u",&COMP,&EMBED);
101 if ((out=check_option(in,n,'d','u')) != NULL)
102 sscanf(out,"%u",&DELAY);
103 if ((out=check_option(in,n,'n','u')) != NULL)
104 sscanf(out,"%lu",&CLENGTH);
105 if ((out=check_option(in,n,'V','u')) != NULL)
106 sscanf(out,"%u",&verbosity);
107 if ((out=check_option(in,n,'k','u')) != NULL)
108 sscanf(out,"%u",&MINN);
109 if ((out=check_option(in,n,'r','f')) != NULL) {
111 sscanf(out,"%lf",&EPS0);
113 if ((out=check_option(in,n,'f','f')) != NULL)
114 sscanf(out,"%lf",&EPSF);
115 if ((out=check_option(in,n,'s','u')) != NULL)
116 sscanf(out,"%u",&STEP);
117 if ((out=check_option(in,n,'C','u')) != NULL) {
118 sscanf(out,"%lu",&causal);
121 if ((out=check_option(in,n,'o','o')) != NULL) {
128 void put_in_boxes(void)
138 for (n=hdim;n<LENGTH-STEP;n++) {
139 i=(int)(series[0][n]*epsinv)&nmax;
140 j=(int)(series[comp1][n-hdim]*epsinv)&nmax;
146 unsigned int hfind_neighbors(unsigned long act)
149 int i,j,i1,i2,j1,k,element;
150 unsigned long nfound=0;
151 unsigned int hcomp,hdel;
152 double max,dx,epsinv;
156 i=(int)(series[0][act]*epsinv)&nmax;
157 j=(int)(series[comp1][act-hdim]*epsinv)&nmax;
159 for (i1=i-1;i1<=i+1;i1++) {
161 for (j1=j-1;j1<=j+1;j1++) {
162 element=box[i2][j1&nmax];
163 while (element != -1) {
166 for (k=0;k<DIM;k += 1) {
169 dx=fabs(series[hcomp][element-hdel]-series[hcomp][act-hdel]);
170 max=(dx>max) ? dx : max;
179 hfound[nfound++]=element;
180 element=list[element];
187 void multiply_matrix(double **mat,double *vec)
192 check_alloc(hvec=(double*)malloc(sizeof(double)*DIM));
193 for (i=0;i<DIM;i++) {
196 hvec[i] += mat[i][j]*vec[j];
203 void make_fit(int number,unsigned long act,double *newcast)
205 double *sj,*si,lavi,lavj,fav;
206 unsigned int hci,hdi,hcj,hdj;
214 for (n=0;n<number;n++) {
217 foreav[j] += series[j][which+STEP];
218 for (j=0;j<DIM;j++) {
221 localav[j] += series[hcj][which-hdj];
226 localav[i] /= number;
230 for (i=0;i<DIM;i++) {
235 for (j=i;j<DIM;j++) {
241 for (n=0;n<number;n++) {
243 mat[i][j] += (si[which-hdi]-lavi)*(sj[which-hdj]-lavj);
246 mat[j][i] = mat[i][j];
250 imat=invert_matrix(mat,DIM);
252 for (i=0;i<COMP;i++) {
255 for (j=0;j<DIM;j++) {
261 for (n=0;n<number;n++) {
263 vec[j] += (si[which+STEP]-fav)*(sj[which-hdj]);
268 multiply_matrix(imat,vec);
270 newcast[i]=foreav[i];
271 for (j=0;j<DIM;j++) {
274 newcast[i] += vec[j]*(series[hcj][act-hdj]-localav[j]);
284 int main(int argc,char **argv)
286 char stin=0,alldone,*done;
288 unsigned long actfound;
289 unsigned long clength;
290 double *rms,*av,*min,*interval,maxinterval,norm;
291 double *error,**individual=NULL;
295 if (scan_help(argc,argv))
296 show_options(argv[0]);
298 scan_options(argc,argv);
303 #ifndef OMIT_WHAT_I_DO
304 if (verbosity&VER_INPUT)
305 what_i_do(argv[0],WID_STR);
308 infile=search_datafile(argc,argv,NULL,verbosity);
312 if (outfile == NULL) {
314 check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
315 strcpy(outfile,infile);
316 strcat(outfile,".fce");
319 check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
320 strcpy(outfile,"stdin.fce");
324 test_outfile(outfile);
327 series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,"",dimset,
330 series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,COLUMN,
333 if ((LENGTH-(EMBED-1)*DELAY) < MINN) {
334 fprintf(stderr,"Data set is too short to find enough neighbors "
335 "for the fit! Exiting!\n");
336 exit(ONESTEP_TOO_FEW_POINTS);
340 check_alloc(min=(double*)malloc(sizeof(double)*COMP));
341 check_alloc(interval=(double*)malloc(sizeof(double)*COMP));
342 check_alloc(av=(double*)malloc(sizeof(double)*COMP));
343 check_alloc(rms=(double*)malloc(sizeof(double)*COMP));
346 for (i=0;i<COMP;i++) {
347 rescale_data(series[i],LENGTH,&min[i],&interval[i]);
348 maxinterval=(maxinterval<interval[i])?interval[i]:maxinterval;
349 variance(series[i],LENGTH,&av[i],&rms[i]);
352 if (verbosity&VER_USR1) {
353 check_alloc(individual=(double**)malloc(sizeof(double*)*COMP));
354 for (j=0;j<COMP;j++) {
355 check_alloc(individual[j]=(double*)malloc(sizeof(double)*LENGTH));
356 for (i=0;i<LENGTH;i++)
357 individual[j][i]=0.0;
361 check_alloc(list=(long*)malloc(sizeof(long)*LENGTH));
362 check_alloc(found=(unsigned long*)malloc(sizeof(long)*LENGTH));
363 check_alloc(hfound=(unsigned long*)malloc(sizeof(long)*LENGTH));
364 check_alloc(done=(char*)malloc(sizeof(char)*LENGTH));
365 check_alloc(box=(long**)malloc(sizeof(long*)*NMAX));
367 check_alloc(box[i]=(long*)malloc(sizeof(long)*NMAX));
369 for (i=0;i<LENGTH;i++)
377 clength=(CLENGTH <= LENGTH) ? CLENGTH-STEP : LENGTH-STEP;
379 indexes=make_multi_index(COMP,EMBED,DELAY);
381 hdim=(EMBED-1)*DELAY;
382 check_alloc(newcast=(double*)malloc(sizeof(double)*COMP));
385 check_alloc(localav=(double*)malloc(sizeof(double)*DIM));
386 check_alloc(foreav=(double*)malloc(sizeof(double)*COMP));
387 check_alloc(vec=(double*)malloc(sizeof(double)*DIM));
388 check_alloc(mat=(double**)malloc(sizeof(double*)*DIM));
390 check_alloc(mat[i]=(double*)malloc(sizeof(double)*DIM));
392 check_alloc(error=(double*)malloc(sizeof(double)*COMP));
400 for (i=(EMBED-1)*DELAY;i<clength;i++)
402 actfound=hfind_neighbors(i);
403 actfound=exclude_interval(actfound,i-causal+1,
404 i+causal+(EMBED-1)*DELAY-1,hfound,found);
405 if (actfound > MINN) {
406 make_fit(actfound,i,newcast);
408 error[j] += sqr(newcast[j]-series[j][i+STEP]);
409 if (verbosity&VER_USR1) {
411 individual[j][i]=(newcast[j]-series[j][i+STEP])*interval[j];
418 norm=((double)clength-(double)((EMBED-1)*DELAY));
420 if (verbosity&VER_USR1) {
421 fprintf(stdout,"#Relative forecast errors for each component:\n");
423 fprintf(stdout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
425 for (i=(EMBED-1)*DELAY;i<clength;i++) {
426 for (j=0;j<COMP-1;j++)
427 fprintf(stdout,"%e ",individual[j][i]);
428 fprintf(stdout,"%e\n",individual[COMP-1][i]);
432 fprintf(stdout,"#Relative forecast errors for each component:\n");
434 fprintf(stdout,"%e\n",sqrt(error[i]/norm)/rms[i]);
438 fout=fopen(outfile,"w");
439 if (verbosity&VER_INPUT)
440 fprintf(stderr,"Opened %s for writing\n",outfile);
441 if (verbosity&VER_USR1) {
442 fprintf(fout,"#Relative forecast errors for each component:\n");
444 fprintf(fout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
446 for (i=(EMBED-1)*DELAY;i<clength;i++) {
447 for (j=0;j<COMP-1;j++)
448 fprintf(fout,"%e ",individual[j][i]);
449 fprintf(fout,"%e\n",individual[COMP-1][i]);
453 fprintf(fout,"#Relative forecast errors for each component:\n");
455 fprintf(fout,"%e\n",sqrt(error[i]/norm)/rms[i]);