2 * This file is part of TISEAN
4 * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
6 * TISEAN is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * TISEAN is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with TISEAN; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*Author: Rainer Hegger. Last modified: Sep 7, 2004 */
25 #include "routines/tsa.h"
28 #define WID_STR "Estimates the average forecast error for a local\n\t\
29 constant fit as a function of the neighborhood size."
32 /*number of boxes for the neighbor search algorithm*/
35 unsigned int nmax=(NMAX-1);
41 char eps0set=0,eps1set=0,causalset=0,dimset=0;
42 char *outfile=NULL,stdo=1;
44 unsigned int dim=1,embed=2,delay=1;
45 unsigned int verbosity=0xff;
47 double EPS0=1.e-3,EPS1=1.0,EPSF=1.2;
48 unsigned long LENGTH=ULONG_MAX,exclude=0,CLENGTH=ULONG_MAX,causal;
51 void show_options(char *progname)
53 what_i_do(progname,WID_STR);
54 fprintf(stderr," Usage: %s [options]\n",progname);
55 fprintf(stderr," Options:\n");
56 fprintf(stderr,"Everything not being a valid option will be interpreted"
58 " datafile.\nIf no datafile is given stdin is read. Just - also"
60 fprintf(stderr,"\t-l # of data to use [default: whole file]\n");
61 fprintf(stderr,"\t-x # of lines to be ignored [default: 0]\n");
62 fprintf(stderr,"\t-c columns to read [default: 1,...,# of components]\n");
63 fprintf(stderr,"\t-m # of components,embedding dimension [default: 1,2]\n");
64 fprintf(stderr,"\t-d delay [default: 1]\n");
65 fprintf(stderr,"\t-i iterations [default: length]\n");
66 fprintf(stderr,"\t-r neighborhood size to start with [default:"
67 " (interval of data)/1000)]\n");
68 fprintf(stderr,"\t-R neighborhood size to end with [default:"
69 " interval of data]\n");
70 fprintf(stderr,"\t-f factor to increase size [default: 1.2]\n");
71 fprintf(stderr,"\t-s steps to forecast [default: 1]\n");
72 fprintf(stderr,"\t-C width of causality window [default: steps]\n");
73 fprintf(stderr,"\t-o output file name [default: 'datafile.lm']\n");
74 fprintf(stderr,"\t-V verbosity level [default: 1]\n\t\t"
75 "0='only panic messages'\n\t\t"
76 "1='+ input/output messages'\n");
77 fprintf(stderr,"\t-h show these options\n");
81 void scan_options(int n,char **in)
85 if ((out=check_option(in,n,'l','u')) != NULL)
86 sscanf(out,"%lu",&LENGTH);
87 if ((out=check_option(in,n,'x','u')) != NULL)
88 sscanf(out,"%lu",&exclude);
89 if ((out=check_option(in,n,'c','s')) != NULL) {
93 if ((out=check_option(in,n,'m','2')) != NULL)
94 sscanf(out,"%u,%u",&dim,&embed);
95 if ((out=check_option(in,n,'d','u')) != NULL)
96 sscanf(out,"%u",&delay);
97 if ((out=check_option(in,n,'i','u')) != NULL)
98 sscanf(out,"%lu",&CLENGTH);
99 if ((out=check_option(in,n,'r','f')) != NULL) {
101 sscanf(out,"%lf",&EPS0);
103 if ((out=check_option(in,n,'R','f')) != NULL) {
105 sscanf(out,"%lf",&EPS1);
107 if ((out=check_option(in,n,'f','f')) != NULL)
108 sscanf(out,"%lf",&EPSF);
109 if ((out=check_option(in,n,'s','u')) != NULL)
110 sscanf(out,"%u",&STEP);
111 if ((out=check_option(in,n,'C','u')) != NULL) {
112 sscanf(out,"%lu",&causal);
115 if ((out=check_option(in,n,'V','u')) != NULL)
116 sscanf(out,"%u",&verbosity);
117 if ((out=check_option(in,n,'o','o')) != NULL) {
124 void make_fit(long act,unsigned long number)
129 for (i=0;i<dim;i++) {
131 cast=si[found[0]+STEP];
132 for (j=1;j<number;j++)
133 cast += si[found[j]+STEP];
134 cast /= (double)number;
135 error[i] += sqr(cast-series[i][act+STEP]);
139 int main(int argc,char **argv)
142 unsigned long actfound;
143 unsigned long *hfound;
145 unsigned long clength;
146 double interval,min,maxinterval;
149 double avfound,*hrms,*hav,sumerror=0.0;
152 if (scan_help(argc,argv))
153 show_options(argv[0]);
155 scan_options(argc,argv);
156 #ifndef OMIT_WHAT_I_DO
157 if (verbosity&VER_INPUT)
158 what_i_do(argv[0],WID_STR);
164 infile=search_datafile(argc,argv,NULL,verbosity);
168 if (outfile == NULL) {
170 check_alloc(outfile=(char*)calloc(strlen(infile)+4,(size_t)1));
171 sprintf(outfile,"%s.lm",infile);
174 check_alloc(outfile=(char*)calloc((size_t)9,(size_t)1));
175 sprintf(outfile,"stdin.lm");
179 test_outfile(outfile);
182 series=(double**)get_multi_series(infile,&LENGTH,exclude,&dim,"",dimset,
185 series=(double**)get_multi_series(infile,&LENGTH,exclude,&dim,column,
188 for (i=0;i<dim;i++) {
189 rescale_data(series[i],LENGTH,&min,&interval);
190 if (interval > maxinterval)
191 maxinterval=interval;
193 interval=maxinterval;
195 check_alloc(list=(long*)malloc(sizeof(long)*LENGTH));
196 check_alloc(found=(unsigned long*)malloc(sizeof(long)*LENGTH));
197 check_alloc(hfound=(unsigned long*)malloc(sizeof(long)*LENGTH));
198 check_alloc(box=(long**)malloc(sizeof(long*)*NMAX));
200 check_alloc(box[i]=(long*)malloc(sizeof(long)*NMAX));
201 check_alloc(error=(double*)malloc(sizeof(double)*dim));
202 check_alloc(hrms=(double*)malloc(sizeof(double)*dim));
203 check_alloc(hav=(double*)malloc(sizeof(double)*dim));
204 check_alloc(hser=(double**)malloc(sizeof(double*)*dim));
211 clength=(CLENGTH <= LENGTH) ? CLENGTH-STEP : LENGTH-STEP;
214 file=fopen(outfile,"w");
215 if (verbosity&VER_INPUT)
216 fprintf(stderr,"Opened %s for writing\n",outfile);
217 fprintf(file,"#1. size 2. relative forecast error 3. fraction of points\n"
218 "#4. av neighbors found 5. absolute variance of the points\n");
221 if (verbosity&VER_INPUT)
222 fprintf(stderr,"Writing to stdout\n");
225 for (epsilon=EPS0;epsilon<EPS1*EPSF;epsilon*=EPSF) {
228 error[i]=hrms[i]=hav[i]=0.0;
230 make_multi_box(series,box,list,LENGTH-STEP,NMAX,dim,
231 embed,delay,epsilon);
232 for (i=(embed-1)*delay;i<clength;i++) {
235 actfound=find_multi_neighbors(series,box,list,hser,LENGTH,
236 NMAX,dim,embed,delay,epsilon,hfound);
237 actfound=exclude_interval(actfound,i-causal+1,i+causal+(embed-1)*delay-1,
239 if (actfound > 2*(dim*embed+1)) {
240 make_fit(i,actfound);
242 avfound += (double)(actfound-1);
243 for (j=0;j<dim;j++) {
244 hrms[j] += series[j][i+STEP]*series[j][i+STEP];
245 hav[j] += series[j][i+STEP];
251 for (j=0;j<dim;j++) {
253 hrms[j]=sqrt(fabs(hrms[j]/(pfound-1)-hav[j]*hav[j]*pfound/(pfound-1)));
254 error[j]=sqrt(error[j]/pfound)/hrms[j];
255 sumerror += error[j];
260 fprintf(stdout,"%e %e ",epsilon*interval,sumerror/(double)dim);
262 fprintf(stdout,"%e ",error[j]);
263 fprintf(stdout,"%e %e\n",(double)pfound/(clength-(embed-1)*delay),
270 fprintf(file,"%e %e ",epsilon*interval,sumerror/(double)dim);
272 fprintf(file,"%e ",error[j]);
273 fprintf(file,"%e %e\n",(double)pfound/(clength-(embed-1)*delay),