--- /dev/null
+/*
+ * This file is part of TISEAN
+ *
+ * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
+ *
+ * TISEAN is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * TISEAN is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with TISEAN; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/*Author: Rainer Hegger */
+/*Changes:
+ Sep 8, 2006: Add -o functionality
+ Sep 7, 2006: Completely rewritten to handle multivariate data
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "routines/tsa.h"
+#include <math.h>
+
+#define WID_STR "Estimates the average forecast error of a local\n\t\
+linear fit"
+
+
+/*number of boxes for the neighbor search algorithm*/
+#define NMAX 512
+
+unsigned int nmax=(NMAX-1),comp1,hdim,**indexes;
+long **box,*list;
+unsigned long *found,*hfound;
+double **series;
+double epsilon;
+double **mat,**imat,*vec,*localav,*foreav;
+
+char epsset=0,causalset=0;
+unsigned int verbosity=VER_INPUT|VER_FIRST_LINE;
+unsigned int COMP=1,EMBED=2,DIM,DELAY=1,MINN=30,STEP=1;
+double EPS0=1.e-3,EPSF=1.2;
+unsigned long LENGTH=ULONG_MAX,exclude=0,CLENGTH=ULONG_MAX,causal;
+char *infile=NULL,*COLUMN=NULL,*outfile=NULL;
+char dimset=0,stout=1;
+
+void show_options(char *progname)
+{
+ what_i_do(progname,WID_STR);
+ fprintf(stderr," Usage: %s [options]\n",progname);
+ fprintf(stderr," Options:\n");
+ fprintf(stderr,"Everything not being a valid option will be interpreted"
+ " as a possible"
+ " datafile.\nIf no datafile is given stdin is read. Just - also"
+ " means stdin\n");
+ fprintf(stderr,"\t-l # of data to use [default: whole file]\n");
+ fprintf(stderr,"\t-x # of lines to be ignored [default: 0]\n");
+ fprintf(stderr,"\t-c columns to read [default: 1]\n");
+ fprintf(stderr,"\t-m # of components, embedding dimension "
+ "[default: %u,%u]\n",COMP,EMBED);
+ fprintf(stderr,"\t-d delay [default: 1]\n");
+ fprintf(stderr,"\t-n iterations [default: length]\n");
+ fprintf(stderr,"\t-k minimal number of neighbors for the fit "
+ "[default: 30]\n");
+ fprintf(stderr,"\t-r neighborhoud size to start with "
+ "[default: (data interval)/1000]\n");
+ fprintf(stderr,"\t-f factor to increase size [default: 1.2]\n");
+ fprintf(stderr,"\t-s steps to forecast [default: 1]\n");
+ fprintf(stderr,"\t-C width of causality window [default: steps]\n");
+ fprintf(stderr,"\t-o output file [default 'datafile'.fce"
+ " no -o means write to stdout]\n");
+ fprintf(stderr,"\t-V verbosity level [default: 1]\n\t\t"
+ "0='only panic messages'\n\t\t"
+ "1='+ input/output messages'\n\t\t"
+ "2='+ print indiviual forecast errors'\n");
+ fprintf(stderr,"\t-h show these options\n");
+ exit(0);
+}
+
+void scan_options(int n,char **in)
+{
+ char *out;
+
+ if ((out=check_option(in,n,'l','u')) != NULL)
+ sscanf(out,"%lu",&LENGTH);
+ if ((out=check_option(in,n,'x','u')) != NULL)
+ sscanf(out,"%lu",&exclude);
+ if ((out=check_option(in,n,'c','s')) != NULL) {
+ COLUMN=out;
+ dimset=1;
+ }
+ if ((out=check_option(in,n,'m','2')) != NULL)
+ sscanf(out,"%u,%u",&COMP,&EMBED);
+ if ((out=check_option(in,n,'d','u')) != NULL)
+ sscanf(out,"%u",&DELAY);
+ if ((out=check_option(in,n,'n','u')) != NULL)
+ sscanf(out,"%lu",&CLENGTH);
+ if ((out=check_option(in,n,'V','u')) != NULL)
+ sscanf(out,"%u",&verbosity);
+ if ((out=check_option(in,n,'k','u')) != NULL)
+ sscanf(out,"%u",&MINN);
+ if ((out=check_option(in,n,'r','f')) != NULL) {
+ epsset=1;
+ sscanf(out,"%lf",&EPS0);
+ }
+ if ((out=check_option(in,n,'f','f')) != NULL)
+ sscanf(out,"%lf",&EPSF);
+ if ((out=check_option(in,n,'s','u')) != NULL)
+ sscanf(out,"%u",&STEP);
+ if ((out=check_option(in,n,'C','u')) != NULL) {
+ sscanf(out,"%lu",&causal);
+ causalset=1;
+ }
+ if ((out=check_option(in,n,'o','o')) != NULL) {
+ stout=0;
+ if (strlen(out) > 0)
+ outfile=out;
+ }
+}
+
+void put_in_boxes(void)
+{
+ int i,j,n;
+ double epsinv;
+
+ epsinv=1.0/epsilon;
+ for (i=0;i<NMAX;i++)
+ for (j=0;j<NMAX;j++)
+ box[i][j]= -1;
+
+ for (n=hdim;n<LENGTH-STEP;n++) {
+ i=(int)(series[0][n]*epsinv)&nmax;
+ j=(int)(series[comp1][n-hdim]*epsinv)&nmax;
+ list[n]=box[i][j];
+ box[i][j]=n;
+ }
+}
+
+unsigned int hfind_neighbors(unsigned long act)
+{
+ char toolarge;
+ int i,j,i1,i2,j1,k,element;
+ unsigned long nfound=0;
+ unsigned int hcomp,hdel;
+ double max,dx,epsinv;
+
+ epsinv=1.0/epsilon;
+
+ i=(int)(series[0][act]*epsinv)&nmax;
+ j=(int)(series[comp1][act-hdim]*epsinv)&nmax;
+
+ for (i1=i-1;i1<=i+1;i1++) {
+ i2=i1&nmax;
+ for (j1=j-1;j1<=j+1;j1++) {
+ element=box[i2][j1&nmax];
+ while (element != -1) {
+ max=0.0;
+ toolarge=0;
+ for (k=0;k<DIM;k += 1) {
+ hcomp=indexes[0][k];
+ hdel=indexes[1][k];
+ dx=fabs(series[hcomp][element-hdel]-series[hcomp][act-hdel]);
+ max=(dx>max) ? dx : max;
+ if (max > epsilon) {
+ toolarge=1;
+ break;
+ }
+ if (toolarge)
+ break;
+ }
+ if (max <= epsilon)
+ hfound[nfound++]=element;
+ element=list[element];
+ }
+ }
+ }
+ return nfound;
+}
+
+void multiply_matrix(double **mat,double *vec)
+{
+ double *hvec;
+ long i,j;
+
+ check_alloc(hvec=(double*)malloc(sizeof(double)*DIM));
+ for (i=0;i<DIM;i++) {
+ hvec[i]=0.0;
+ for (j=0;j<DIM;j++)
+ hvec[i] += mat[i][j]*vec[j];
+ }
+ for (i=0;i<DIM;i++)
+ vec[i]=hvec[i];
+ free(hvec);
+}
+
+void make_fit(int number,unsigned long act,double *newcast)
+{
+ double *sj,*si,lavi,lavj,fav;
+ unsigned int hci,hdi,hcj,hdj;
+ long i,j,n,which;
+
+ for (i=0;i<DIM;i++)
+ localav[i]=0.0;
+ for (i=0;i<COMP;i++)
+ foreav[i]=0.0;
+
+ for (n=0;n<number;n++) {
+ which=found[n];
+ for (j=0;j<COMP;j++)
+ foreav[j] += series[j][which+STEP];
+ for (j=0;j<DIM;j++) {
+ hcj=indexes[0][j];
+ hdj=indexes[1][j];
+ localav[j] += series[hcj][which-hdj];
+ }
+ }
+
+ for (i=0;i<DIM;i++)
+ localav[i] /= number;
+ for (i=0;i<COMP;i++)
+ foreav[i] /= number;
+
+ for (i=0;i<DIM;i++) {
+ hci=indexes[0][i];
+ hdi=indexes[1][i];
+ lavi=localav[i];
+ si=series[hci];
+ for (j=i;j<DIM;j++) {
+ hcj=indexes[0][j];
+ hdj=indexes[1][j];
+ lavj=localav[j];
+ sj=series[hcj];
+ mat[i][j]=0.0;
+ for (n=0;n<number;n++) {
+ which=found[n];
+ mat[i][j] += (si[which-hdi]-lavi)*(sj[which-hdj]-lavj);
+ }
+ mat[i][j] /= number;
+ mat[j][i] = mat[i][j];
+ }
+ }
+
+ imat=invert_matrix(mat,DIM);
+
+ for (i=0;i<COMP;i++) {
+ si=series[i];
+ fav=foreav[i];
+ for (j=0;j<DIM;j++) {
+ hcj=indexes[0][j];
+ hdj=indexes[1][j];
+ lavj=localav[j];
+ vec[j]=0.0;
+ sj=series[hcj];
+ for (n=0;n<number;n++) {
+ which=found[n];
+ vec[j] += (si[which+STEP]-fav)*(sj[which-hdj]);
+ }
+ vec[j] /= number;
+ }
+
+ multiply_matrix(imat,vec);
+
+ newcast[i]=foreav[i];
+ for (j=0;j<DIM;j++) {
+ hcj=indexes[0][j];
+ hdj=indexes[1][j];
+ newcast[i] += vec[j]*(series[hcj][act-hdj]-localav[j]);
+ }
+ }
+
+
+ for (i=0;i<DIM;i++)
+ free(imat[i]);
+ free(imat);
+}
+
+int main(int argc,char **argv)
+{
+ char stin=0,alldone,*done;
+ long i,j;
+ unsigned long actfound;
+ unsigned long clength;
+ double *rms,*av,*min,*interval,maxinterval,norm;
+ double *error,**individual=NULL;
+ double *newcast;
+ FILE *fout;
+
+ if (scan_help(argc,argv))
+ show_options(argv[0]);
+
+ scan_options(argc,argv);
+
+ if (!causalset)
+ causal=STEP;
+
+#ifndef OMIT_WHAT_I_DO
+ if (verbosity&VER_INPUT)
+ what_i_do(argv[0],WID_STR);
+#endif
+
+ infile=search_datafile(argc,argv,NULL,verbosity);
+ if (infile == NULL)
+ stin=1;
+
+ if (outfile == NULL) {
+ if (!stin) {
+ check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
+ strcpy(outfile,infile);
+ strcat(outfile,".fce");
+ }
+ else {
+ check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
+ strcpy(outfile,"stdin.fce");
+ }
+ }
+ if (!stout)
+ test_outfile(outfile);
+
+ if (COLUMN == NULL)
+ series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,"",dimset,
+ verbosity);
+ else
+ series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,COLUMN,
+ dimset,verbosity);
+
+ if ((LENGTH-(EMBED-1)*DELAY) < MINN) {
+ fprintf(stderr,"Data set is too short to find enough neighbors "
+ "for the fit! Exiting!\n");
+ exit(ONESTEP_TOO_FEW_POINTS);
+ }
+
+ DIM=EMBED*COMP;
+ check_alloc(min=(double*)malloc(sizeof(double)*COMP));
+ check_alloc(interval=(double*)malloc(sizeof(double)*COMP));
+ check_alloc(av=(double*)malloc(sizeof(double)*COMP));
+ check_alloc(rms=(double*)malloc(sizeof(double)*COMP));
+
+ maxinterval=0.0;
+ for (i=0;i<COMP;i++) {
+ rescale_data(series[i],LENGTH,&min[i],&interval[i]);
+ maxinterval=(maxinterval<interval[i])?interval[i]:maxinterval;
+ variance(series[i],LENGTH,&av[i],&rms[i]);
+ }
+
+ if (verbosity&VER_USR1) {
+ check_alloc(individual=(double**)malloc(sizeof(double*)*COMP));
+ for (j=0;j<COMP;j++) {
+ check_alloc(individual[j]=(double*)malloc(sizeof(double)*LENGTH));
+ for (i=0;i<LENGTH;i++)
+ individual[j][i]=0.0;
+ }
+ }
+
+ check_alloc(list=(long*)malloc(sizeof(long)*LENGTH));
+ check_alloc(found=(unsigned long*)malloc(sizeof(long)*LENGTH));
+ check_alloc(hfound=(unsigned long*)malloc(sizeof(long)*LENGTH));
+ check_alloc(done=(char*)malloc(sizeof(char)*LENGTH));
+ check_alloc(box=(long**)malloc(sizeof(long*)*NMAX));
+ for (i=0;i<NMAX;i++)
+ check_alloc(box[i]=(long*)malloc(sizeof(long)*NMAX));
+
+ for (i=0;i<LENGTH;i++)
+ done[i]=0;
+
+ alldone=0;
+ if (epsset)
+ EPS0 /= maxinterval;
+
+ epsilon=EPS0/EPSF;
+ clength=(CLENGTH <= LENGTH) ? CLENGTH-STEP : LENGTH-STEP;
+ comp1=COMP-1;
+ indexes=make_multi_index(COMP,EMBED,DELAY);
+
+ hdim=(EMBED-1)*DELAY;
+ check_alloc(newcast=(double*)malloc(sizeof(double)*COMP));
+
+
+ check_alloc(localav=(double*)malloc(sizeof(double)*DIM));
+ check_alloc(foreav=(double*)malloc(sizeof(double)*COMP));
+ check_alloc(vec=(double*)malloc(sizeof(double)*DIM));
+ check_alloc(mat=(double**)malloc(sizeof(double*)*DIM));
+ for (i=0;i<=DIM;i++)
+ check_alloc(mat[i]=(double*)malloc(sizeof(double)*DIM));
+
+ check_alloc(error=(double*)malloc(sizeof(double)*COMP));
+ for (i=0;i<COMP;i++)
+ error[i]=0.0;
+
+ while (!alldone) {
+ alldone=1;
+ epsilon*=EPSF;
+ put_in_boxes() ;
+ for (i=(EMBED-1)*DELAY;i<clength;i++)
+ if (!done[i]) {
+ actfound=hfind_neighbors(i);
+ actfound=exclude_interval(actfound,i-causal+1,
+ i+causal+(EMBED-1)*DELAY-1,hfound,found);
+ if (actfound > MINN) {
+ make_fit(actfound,i,newcast);
+ for (j=0;j<COMP;j++)
+ error[j] += sqr(newcast[j]-series[j][i+STEP]);
+ if (verbosity&VER_USR1) {
+ for (j=0;j<COMP;j++)
+ individual[j][i]=(newcast[j]-series[j][i+STEP])*interval[j];
+ }
+ done[i]=1;
+ }
+ alldone &= done[i];
+ }
+ }
+ norm=((double)clength-(double)((EMBED-1)*DELAY));
+ if (stout) {
+ if (verbosity&VER_USR1) {
+ fprintf(stdout,"#Relative forecast errors for each component:\n");
+ for (i=0;i<COMP;i++)
+ fprintf(stdout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
+
+ for (i=(EMBED-1)*DELAY;i<clength;i++) {
+ for (j=0;j<COMP-1;j++)
+ fprintf(stdout,"%e ",individual[j][i]);
+ fprintf(stdout,"%e\n",individual[COMP-1][i]);
+ }
+ }
+ else {
+ fprintf(stdout,"#Relative forecast errors for each component:\n");
+ for (i=0;i<COMP;i++)
+ fprintf(stdout,"%e\n",sqrt(error[i]/norm)/rms[i]);
+ }
+ }
+ else {
+ fout=fopen(outfile,"w");
+ if (verbosity&VER_INPUT)
+ fprintf(stderr,"Opened %s for writing\n",outfile);
+ if (verbosity&VER_USR1) {
+ fprintf(fout,"#Relative forecast errors for each component:\n");
+ for (i=0;i<COMP;i++)
+ fprintf(fout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
+
+ for (i=(EMBED-1)*DELAY;i<clength;i++) {
+ for (j=0;j<COMP-1;j++)
+ fprintf(fout,"%e ",individual[j][i]);
+ fprintf(fout,"%e\n",individual[COMP-1][i]);
+ }
+ }
+ else {
+ fprintf(fout,"#Relative forecast errors for each component:\n");
+ for (i=0;i<COMP;i++)
+ fprintf(fout,"%e\n",sqrt(error[i]/norm)/rms[i]);
+ }
+ fclose(fout);
+ free(outfile);
+ }
+
+ return 0;
+}