Adding DisEMBL dependency Tisean executable
[jabaws.git] / binaries / src / disembl / Tisean_3.0.1 / source_c / lfo-test.c
diff --git a/binaries/src/disembl/Tisean_3.0.1/source_c/lfo-test.c b/binaries/src/disembl/Tisean_3.0.1/source_c/lfo-test.c
new file mode 100644 (file)
index 0000000..7c90879
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ *   This file is part of TISEAN
+ *
+ *   Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
+ *
+ *   TISEAN is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   TISEAN is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with TISEAN; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+/*Author: Rainer Hegger */
+/*Changes:
+  Sep 8, 2006: Add -o functionality
+  Sep 7, 2006: Completely rewritten to handle multivariate data
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "routines/tsa.h"
+#include <math.h>
+
+#define WID_STR "Estimates the average forecast error of a local\n\t\
+linear fit"
+
+
+/*number of boxes for the neighbor search algorithm*/
+#define NMAX 512
+
+unsigned int nmax=(NMAX-1),comp1,hdim,**indexes;
+long **box,*list;
+unsigned long *found,*hfound;
+double **series;
+double epsilon;
+double **mat,**imat,*vec,*localav,*foreav;
+
+char epsset=0,causalset=0;
+unsigned int verbosity=VER_INPUT|VER_FIRST_LINE;
+unsigned int COMP=1,EMBED=2,DIM,DELAY=1,MINN=30,STEP=1;
+double EPS0=1.e-3,EPSF=1.2;
+unsigned long LENGTH=ULONG_MAX,exclude=0,CLENGTH=ULONG_MAX,causal;
+char *infile=NULL,*COLUMN=NULL,*outfile=NULL;
+char dimset=0,stout=1;
+
+void show_options(char *progname)
+{
+  what_i_do(progname,WID_STR);
+  fprintf(stderr," Usage: %s [options]\n",progname);
+  fprintf(stderr," Options:\n");
+  fprintf(stderr,"Everything not being a valid option will be interpreted"
+          " as a possible"
+          " datafile.\nIf no datafile is given stdin is read. Just - also"
+          " means stdin\n");
+  fprintf(stderr,"\t-l # of data to use [default: whole file]\n");
+  fprintf(stderr,"\t-x # of lines to be ignored [default: 0]\n");
+  fprintf(stderr,"\t-c columns to read [default: 1]\n");
+  fprintf(stderr,"\t-m # of components, embedding dimension "
+         "[default: %u,%u]\n",COMP,EMBED);
+  fprintf(stderr,"\t-d delay [default: 1]\n");
+  fprintf(stderr,"\t-n iterations [default: length]\n");
+  fprintf(stderr,"\t-k minimal number of neighbors for the fit "
+         "[default: 30]\n");
+  fprintf(stderr,"\t-r neighborhoud size to start with "
+         "[default: (data interval)/1000]\n");
+  fprintf(stderr,"\t-f factor to increase size [default: 1.2]\n");
+  fprintf(stderr,"\t-s steps to forecast [default: 1]\n");
+  fprintf(stderr,"\t-C width of causality window [default: steps]\n");
+  fprintf(stderr,"\t-o output file [default 'datafile'.fce"
+         " no -o means write to stdout]\n");
+  fprintf(stderr,"\t-V verbosity level [default: 1]\n\t\t"
+          "0='only panic messages'\n\t\t"
+          "1='+ input/output messages'\n\t\t"
+         "2='+ print indiviual forecast errors'\n");
+  fprintf(stderr,"\t-h show these options\n");
+  exit(0);
+}
+
+void scan_options(int n,char **in)
+{
+  char *out;
+
+  if ((out=check_option(in,n,'l','u')) != NULL)
+    sscanf(out,"%lu",&LENGTH);
+  if ((out=check_option(in,n,'x','u')) != NULL)
+    sscanf(out,"%lu",&exclude);
+  if ((out=check_option(in,n,'c','s')) != NULL) {
+    COLUMN=out;
+    dimset=1;
+  }
+  if ((out=check_option(in,n,'m','2')) != NULL)
+    sscanf(out,"%u,%u",&COMP,&EMBED);
+  if ((out=check_option(in,n,'d','u')) != NULL)
+    sscanf(out,"%u",&DELAY);
+  if ((out=check_option(in,n,'n','u')) != NULL)
+    sscanf(out,"%lu",&CLENGTH);
+  if ((out=check_option(in,n,'V','u')) != NULL)
+    sscanf(out,"%u",&verbosity);
+  if ((out=check_option(in,n,'k','u')) != NULL)
+    sscanf(out,"%u",&MINN);
+  if ((out=check_option(in,n,'r','f')) != NULL) {
+    epsset=1;
+    sscanf(out,"%lf",&EPS0);
+  }
+  if ((out=check_option(in,n,'f','f')) != NULL)
+    sscanf(out,"%lf",&EPSF);
+  if ((out=check_option(in,n,'s','u')) != NULL)
+    sscanf(out,"%u",&STEP);
+  if ((out=check_option(in,n,'C','u')) != NULL) {
+    sscanf(out,"%lu",&causal);
+    causalset=1;
+  }
+  if ((out=check_option(in,n,'o','o')) != NULL) {
+    stout=0;
+    if (strlen(out) > 0)
+      outfile=out;
+  }
+}
+
+void put_in_boxes(void)
+{
+  int i,j,n;
+  double epsinv;
+
+  epsinv=1.0/epsilon;
+  for (i=0;i<NMAX;i++)
+    for (j=0;j<NMAX;j++)
+      box[i][j]= -1;
+
+  for (n=hdim;n<LENGTH-STEP;n++) {
+    i=(int)(series[0][n]*epsinv)&nmax;
+    j=(int)(series[comp1][n-hdim]*epsinv)&nmax;
+    list[n]=box[i][j];
+    box[i][j]=n;
+  }
+}
+
+unsigned int hfind_neighbors(unsigned long act)
+{
+  char toolarge;
+  int i,j,i1,i2,j1,k,element;
+  unsigned long nfound=0;
+  unsigned int hcomp,hdel;
+  double max,dx,epsinv;
+
+  epsinv=1.0/epsilon;
+
+  i=(int)(series[0][act]*epsinv)&nmax;
+  j=(int)(series[comp1][act-hdim]*epsinv)&nmax;
+  
+  for (i1=i-1;i1<=i+1;i1++) {
+    i2=i1&nmax;
+    for (j1=j-1;j1<=j+1;j1++) {
+      element=box[i2][j1&nmax];
+      while (element != -1) {
+       max=0.0;
+       toolarge=0;
+       for (k=0;k<DIM;k += 1) {
+         hcomp=indexes[0][k];
+         hdel=indexes[1][k];
+         dx=fabs(series[hcomp][element-hdel]-series[hcomp][act-hdel]);
+         max=(dx>max) ? dx : max;
+         if (max > epsilon) {
+           toolarge=1;
+           break;
+         }
+         if (toolarge)
+           break;
+       }
+       if (max <= epsilon)
+         hfound[nfound++]=element;
+       element=list[element];
+      }
+    }
+  }
+  return nfound;
+}
+
+void multiply_matrix(double **mat,double *vec)
+{
+  double *hvec;
+  long i,j;
+
+  check_alloc(hvec=(double*)malloc(sizeof(double)*DIM));
+  for (i=0;i<DIM;i++) {
+    hvec[i]=0.0;
+    for (j=0;j<DIM;j++)
+      hvec[i] += mat[i][j]*vec[j];
+  }
+  for (i=0;i<DIM;i++)
+    vec[i]=hvec[i];
+  free(hvec);
+}
+
+void make_fit(int number,unsigned long act,double *newcast)
+{
+  double *sj,*si,lavi,lavj,fav;
+  unsigned int hci,hdi,hcj,hdj;
+  long i,j,n,which;
+
+  for (i=0;i<DIM;i++)
+    localav[i]=0.0;
+  for (i=0;i<COMP;i++)
+    foreav[i]=0.0;
+
+  for (n=0;n<number;n++) {
+    which=found[n];
+    for (j=0;j<COMP;j++)
+      foreav[j] += series[j][which+STEP];
+    for (j=0;j<DIM;j++) {
+      hcj=indexes[0][j];
+      hdj=indexes[1][j];
+      localav[j] += series[hcj][which-hdj];
+    }
+  }
+
+  for (i=0;i<DIM;i++)
+    localav[i] /= number;
+  for (i=0;i<COMP;i++)
+    foreav[i] /= number;
+
+  for (i=0;i<DIM;i++) {
+    hci=indexes[0][i];
+    hdi=indexes[1][i];
+    lavi=localav[i];
+    si=series[hci];
+    for (j=i;j<DIM;j++) {
+      hcj=indexes[0][j];
+      hdj=indexes[1][j];
+      lavj=localav[j];
+      sj=series[hcj];
+      mat[i][j]=0.0;
+      for (n=0;n<number;n++) {
+       which=found[n];
+       mat[i][j] += (si[which-hdi]-lavi)*(sj[which-hdj]-lavj);
+      }
+      mat[i][j] /= number;
+      mat[j][i] = mat[i][j];
+    }
+  }
+
+  imat=invert_matrix(mat,DIM);
+
+  for (i=0;i<COMP;i++) {
+    si=series[i];
+    fav=foreav[i];
+    for (j=0;j<DIM;j++) {
+      hcj=indexes[0][j];
+      hdj=indexes[1][j];
+      lavj=localav[j];
+      vec[j]=0.0;
+      sj=series[hcj];
+      for (n=0;n<number;n++) {
+       which=found[n];
+       vec[j] += (si[which+STEP]-fav)*(sj[which-hdj]);
+      }
+      vec[j] /= number;
+    }
+
+    multiply_matrix(imat,vec);
+
+    newcast[i]=foreav[i];
+    for (j=0;j<DIM;j++) {
+      hcj=indexes[0][j];
+      hdj=indexes[1][j];
+      newcast[i] += vec[j]*(series[hcj][act-hdj]-localav[j]);
+    }
+  }
+  
+
+  for (i=0;i<DIM;i++)
+    free(imat[i]);
+  free(imat);
+}
+
+int main(int argc,char **argv)
+{
+  char stin=0,alldone,*done;
+  long i,j;
+  unsigned long actfound;
+  unsigned long clength;
+  double *rms,*av,*min,*interval,maxinterval,norm;
+  double *error,**individual=NULL;
+  double *newcast;
+  FILE *fout;
+
+  if (scan_help(argc,argv))
+    show_options(argv[0]);
+  
+  scan_options(argc,argv);
+
+  if (!causalset)
+    causal=STEP;
+
+#ifndef OMIT_WHAT_I_DO
+  if (verbosity&VER_INPUT)
+    what_i_do(argv[0],WID_STR);
+#endif
+
+  infile=search_datafile(argc,argv,NULL,verbosity);
+  if (infile == NULL)
+    stin=1;
+  
+  if (outfile == NULL) {
+    if (!stin) {
+      check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
+      strcpy(outfile,infile);
+      strcat(outfile,".fce");
+    }
+    else {
+      check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
+      strcpy(outfile,"stdin.fce");
+    }
+  }
+  if (!stout)
+    test_outfile(outfile);
+  
+  if (COLUMN == NULL)
+    series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,"",dimset,
+                                      verbosity);
+  else
+    series=(double**)get_multi_series(infile,&LENGTH,exclude,&COMP,COLUMN,
+                                      dimset,verbosity);
+
+  if ((LENGTH-(EMBED-1)*DELAY) < MINN) {
+    fprintf(stderr,"Data set is too short to find enough neighbors "
+           "for the fit! Exiting!\n");
+    exit(ONESTEP_TOO_FEW_POINTS);
+  }
+
+  DIM=EMBED*COMP;
+  check_alloc(min=(double*)malloc(sizeof(double)*COMP));
+  check_alloc(interval=(double*)malloc(sizeof(double)*COMP));
+  check_alloc(av=(double*)malloc(sizeof(double)*COMP));
+  check_alloc(rms=(double*)malloc(sizeof(double)*COMP));
+
+  maxinterval=0.0;
+  for (i=0;i<COMP;i++) {
+    rescale_data(series[i],LENGTH,&min[i],&interval[i]);
+    maxinterval=(maxinterval<interval[i])?interval[i]:maxinterval;
+    variance(series[i],LENGTH,&av[i],&rms[i]);
+  }
+  
+  if (verbosity&VER_USR1) {
+    check_alloc(individual=(double**)malloc(sizeof(double*)*COMP));
+    for (j=0;j<COMP;j++) {
+      check_alloc(individual[j]=(double*)malloc(sizeof(double)*LENGTH));
+      for (i=0;i<LENGTH;i++)
+       individual[j][i]=0.0;
+    }
+  }
+
+  check_alloc(list=(long*)malloc(sizeof(long)*LENGTH));
+  check_alloc(found=(unsigned long*)malloc(sizeof(long)*LENGTH));
+  check_alloc(hfound=(unsigned long*)malloc(sizeof(long)*LENGTH));
+  check_alloc(done=(char*)malloc(sizeof(char)*LENGTH));
+  check_alloc(box=(long**)malloc(sizeof(long*)*NMAX));
+  for (i=0;i<NMAX;i++)
+    check_alloc(box[i]=(long*)malloc(sizeof(long)*NMAX));
+    
+  for (i=0;i<LENGTH;i++)
+    done[i]=0;
+
+  alldone=0;
+  if (epsset)
+    EPS0 /= maxinterval;
+
+  epsilon=EPS0/EPSF;
+  clength=(CLENGTH <= LENGTH) ? CLENGTH-STEP : LENGTH-STEP;
+  comp1=COMP-1;
+  indexes=make_multi_index(COMP,EMBED,DELAY);
+
+  hdim=(EMBED-1)*DELAY;
+  check_alloc(newcast=(double*)malloc(sizeof(double)*COMP));
+
+
+  check_alloc(localav=(double*)malloc(sizeof(double)*DIM));
+  check_alloc(foreav=(double*)malloc(sizeof(double)*COMP));
+  check_alloc(vec=(double*)malloc(sizeof(double)*DIM));
+  check_alloc(mat=(double**)malloc(sizeof(double*)*DIM));
+  for (i=0;i<=DIM;i++)
+    check_alloc(mat[i]=(double*)malloc(sizeof(double)*DIM));
+
+  check_alloc(error=(double*)malloc(sizeof(double)*COMP));
+  for (i=0;i<COMP;i++)
+    error[i]=0.0;
+
+  while (!alldone) {
+    alldone=1;
+    epsilon*=EPSF;
+    put_in_boxes() ;
+    for (i=(EMBED-1)*DELAY;i<clength;i++)
+      if (!done[i]) {
+       actfound=hfind_neighbors(i);
+       actfound=exclude_interval(actfound,i-causal+1,
+                                 i+causal+(EMBED-1)*DELAY-1,hfound,found);
+       if (actfound > MINN) {
+         make_fit(actfound,i,newcast);
+         for (j=0;j<COMP;j++)
+           error[j] += sqr(newcast[j]-series[j][i+STEP]);
+         if (verbosity&VER_USR1) {
+           for (j=0;j<COMP;j++)
+             individual[j][i]=(newcast[j]-series[j][i+STEP])*interval[j];
+         }
+         done[i]=1;
+       }
+       alldone &= done[i];
+      }
+  }
+  norm=((double)clength-(double)((EMBED-1)*DELAY));
+  if (stout) {
+    if (verbosity&VER_USR1) {
+      fprintf(stdout,"#Relative forecast errors for each component:\n");
+      for (i=0;i<COMP;i++) 
+       fprintf(stdout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
+    
+      for (i=(EMBED-1)*DELAY;i<clength;i++) {
+       for (j=0;j<COMP-1;j++)
+         fprintf(stdout,"%e ",individual[j][i]);
+       fprintf(stdout,"%e\n",individual[COMP-1][i]);
+      }
+    }
+    else {
+      fprintf(stdout,"#Relative forecast errors for each component:\n");
+      for (i=0;i<COMP;i++) 
+       fprintf(stdout,"%e\n",sqrt(error[i]/norm)/rms[i]);
+    }
+  }
+  else {
+    fout=fopen(outfile,"w");
+    if (verbosity&VER_INPUT)
+      fprintf(stderr,"Opened %s for writing\n",outfile);
+    if (verbosity&VER_USR1) {
+      fprintf(fout,"#Relative forecast errors for each component:\n");
+      for (i=0;i<COMP;i++) 
+       fprintf(fout,"# %e\n",sqrt(error[i]/norm)/rms[i]);
+    
+      for (i=(EMBED-1)*DELAY;i<clength;i++) {
+       for (j=0;j<COMP-1;j++)
+         fprintf(fout,"%e ",individual[j][i]);
+       fprintf(fout,"%e\n",individual[COMP-1][i]);
+      }
+    }
+    else {
+      fprintf(fout,"#Relative forecast errors for each component:\n");
+      for (i=0;i<COMP;i++) 
+       fprintf(fout,"%e\n",sqrt(error[i]/norm)/rms[i]);
+    }
+    fclose(fout);
+    free(outfile);
+  }
+
+  return 0;
+}