2 * This file is part of TISEAN
4 * Copyright (c) 1998-2007 Rainer Hegger, Holger Kantz, Thomas Schreiber
6 * TISEAN is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * TISEAN is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with TISEAN; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /*Author: Rainer Hegger, Last modified: Feb 6, 2006 */
22 Feb 4, 2006: First version
23 Feb 6, 2006: Find and remove bugs (1)
24 Feb 11, 2006: Add rand_arb_dist to iterate_***_model
31 #include "routines/tsa.h"
33 #define WID_STR "Fits an multivariate ARIMA model to the data and gives\
34 the coefficients\n\tand the residues (or an iterated model)"
36 unsigned long length=ULONG_MAX,exclude=0;
37 unsigned int dim=1,poles=10,ilength,ITER=50;
38 unsigned int arpoles=0,ipoles=0,mapoles=0,offset;
39 unsigned int verbosity=1;
40 char *outfile=NULL,*column=NULL,stdo=1,dimset=0,run_model=0,arimaset=0;
42 double **series,convergence=1.0e-3;
45 unsigned long ardim,armadim;
46 unsigned int **aindex;
48 void show_options(char *progname)
50 what_i_do(progname,WID_STR);
51 fprintf(stderr," Usage: %s [options]\n",progname);
52 fprintf(stderr," Options:\n");
53 fprintf(stderr,"Everything not being a valid option will be interpreted"
55 " datafile.\nIf no datafile is given stdin is read. Just - also"
57 fprintf(stderr,"\t-l length of file [default is whole file]\n");
58 fprintf(stderr,"\t-x # of lines to be ignored [default is 0]\n");
59 fprintf(stderr,"\t-m dimension [default is 1]\n");
60 fprintf(stderr,"\t-c columns to read [default is 1,...,dimension]\n");
61 fprintf(stderr,"\t-p order of initial AR-Fit [default is %u]\n",poles);
62 fprintf(stderr,"\t-P order of AR,I,MA-Fit [default is %u,%u,%u]\n",
63 arpoles,ipoles,mapoles);
64 fprintf(stderr,"\t-I # of arima iterations [default is %u]\n",ITER);
65 fprintf(stderr,"\t-e accuracy of convergence [default is %lf]\n",convergence);
66 fprintf(stderr,"\t-s length of iterated model [default no iteration]\n");
67 fprintf(stderr,"\t-o output file name [default is 'datafile'.ari]\n");
68 fprintf(stderr,"\t-V verbosity level [default is 1]\n\t\t"
69 "0='only panic messages'\n\t\t"
70 "1='+ input/output messages'\n\t\t"
71 "2='+ print residuals though iterating a model'\n\t\t"
72 "4='+ print original data plus residuals'\n");
73 fprintf(stderr,"\t-h show these options\n\n");
77 void scan_options(int argc,char **argv)
81 if ((out=check_option(argv,argc,'p','u')) != NULL) {
82 sscanf(out,"%u",&poles);
84 fprintf(stderr,"The order should at least be one!\n");
88 if ((out=check_option(argv,argc,'l','u')) != NULL)
89 sscanf(out,"%lu",&length);
90 if ((out=check_option(argv,argc,'x','u')) != NULL)
91 sscanf(out,"%lu",&exclude);
92 if ((out=check_option(argv,argc,'m','u')) != NULL) {
93 sscanf(out,"%u",&dim);
96 if ((out=check_option(argv,argc,'P','3')) != NULL) {
97 sscanf(out,"%u,%u,%u",&arpoles,&ipoles,&mapoles);
98 if ((arpoles+ipoles+mapoles)>0)
101 if ((out=check_option(argv,argc,'I','u')) != NULL)
102 sscanf(out,"%u",&ITER);
103 if ((out=check_option(argv,argc,'e','f')) != NULL)
104 sscanf(out,"%lf",&convergence);
105 if ((out=check_option(argv,argc,'c','u')) != NULL)
107 if ((out=check_option(argv,argc,'V','u')) != NULL)
108 sscanf(out,"%u",&verbosity);
109 if ((out=check_option(argv,argc,'s','u')) != NULL) {
110 sscanf(out,"%u",&ilength);
113 if ((out=check_option(argv,argc,'o','o')) != NULL) {
120 void make_difference(void)
124 for (i=length-1;i>0;i--)
126 series[d][i]=series[d][i]-series[d][i-1];
129 unsigned int** make_ar_index(void)
131 unsigned int** ar_index;
134 check_alloc(ar_index=(unsigned int**)malloc(sizeof(unsigned int*)*2));
136 check_alloc(ar_index[i]=(unsigned int*)
137 malloc(sizeof(unsigned int)*ardim));
138 for (i=0;i<ardim;i++) {
139 ar_index[0][i]=i/poles;
140 ar_index[1][i]=i%poles;
145 unsigned int** make_arima_index(unsigned int ars,unsigned int mas)
147 unsigned int** arima_index;
152 check_alloc(arima_index=(unsigned int**)malloc(sizeof(unsigned int*)*2));
154 check_alloc(arima_index[i]=(unsigned int*)
155 malloc(sizeof(unsigned int)*armad));
156 for (i=0;i<ars*dim;i++) {
157 arima_index[0][i]=i/ars;
158 arima_index[1][i]=i%ars;
161 for (i=0;i<mas*dim;i++) {
162 arima_index[0][i+i0]=dim+i/mas;
163 arima_index[1][i+i0]=i%mas;
169 void set_averages_to_zero(void)
174 for (i=0;i<dim;i++) {
175 variance(series[i],length,&my_average[i],&var);
176 for (j=0;j<length;j++)
177 series[i][j] -= my_average[i];
181 double** build_matrix(double **mat,unsigned int size)
183 long n,i,j,is,id,js,jd;
186 norm=1./((double)length-1.0-(double)poles-(double)offset);
188 for (i=0;i<size;i++) {
191 for (j=i;j<size;j++) {
195 for (n=offset+poles-1;n<length-1;n++)
196 mat[i][j] += series[id][n-is]*series[jd][n-js];
202 return invert_matrix(mat,size);
205 void build_vector(double *vec,unsigned int size,long comp)
210 norm=1./((double)length-1.0-(double)poles-(double)offset);
212 for (i=0;i<size;i++) {
216 for (n=offset+poles-1;n<length-1;n++)
217 vec[i] += series[comp][n+1]*series[id][n-is];
222 double* multiply_matrix_vector(double **mat,double *vec,unsigned int size)
227 check_alloc(new_vec=(double*)malloc(sizeof(double)*size));
229 for (i=0;i<size;i++) {
232 new_vec[i] += mat[i][j]*vec[j];
238 double* make_residuals(double **diff,double **coeff,unsigned int size)
243 check_alloc(resi=(double*)malloc(sizeof(double)*dim));
247 for (n=poles-1;n<length-1;n++) {
249 for (d=0;d<dim;d++) {
250 diff[d][n1]=series[d][n1];
251 for (i=0;i<size;i++) {
254 diff[d][n1] -= coeff[d][i]*series[id][n-is];
256 resi[d] += sqr(diff[d][n1]);
261 resi[i]=sqrt(resi[i]/((double)length-(double)poles));
266 void iterate_model(double **coeff,double *sigma,double **diff,FILE *file)
269 double **iterate,*swap,**myrand;
271 check_alloc(iterate=(double**)malloc(sizeof(double*)*(poles+1)));
272 for (i=0;i<=poles;i++)
273 check_alloc(iterate[i]=(double*)malloc(sizeof(double)*dim));
275 check_alloc(myrand=(double**)malloc(sizeof(double*)*dim));
277 myrand[i]=rand_arb_dist(diff[i],length,ilength+poles,100,0x44325);
283 for (j=0;j<poles;j++)
284 iterate[j][i]=myrand[i][j];
286 for (n=0;n<ilength;n++) {
287 for (d=0;d<dim;d++) {
288 iterate[poles][d]=myrand[d][n+poles];
289 for (i1=0;i1<dim;i1++)
290 for (i2=0;i2<poles;i2++)
291 iterate[poles][d] += coeff[d][i1*poles+i2]*iterate[poles-1-i2][i1];
295 fprintf(file,"%e ",iterate[poles][d]);
300 printf("%e ",iterate[poles][d]);
305 for (i=0;i<poles;i++)
306 iterate[i]=iterate[i+1];
310 for (i=0;i<=poles;i++)
319 void iterate_arima_model(double **coeff,double *sigma,double **diff,FILE *file)
321 double **iterate,*swap,**myrand;
322 unsigned long i,j,n,is,id;
324 check_alloc(iterate=(double**)malloc(sizeof(double*)*(poles+1)));
325 for (i=0;i<=poles;i++)
326 check_alloc(iterate[i]=(double*)malloc(sizeof(double)*2*dim));
328 check_alloc(myrand=(double**)malloc(sizeof(double*)*dim));
330 myrand[i]=rand_arb_dist(diff[i],length,ilength+poles,100,0x44325);
336 for (j=0;j<poles;j++)
337 iterate[j][i]=iterate[j][dim+i]=myrand[i][j];
339 for (n=0;n<ilength;n++) {
341 iterate[poles][i]=iterate[poles][i+dim]=myrand[i][n+poles];
343 for (j=0;j<dim;j++) {
344 for (i=0;i<armadim;i++) {
347 iterate[poles][j] += coeff[j][i]*iterate[poles-1-is][id];
353 fprintf(file,"%e ",iterate[poles][i]);
358 printf("%e ",iterate[poles][i]);
363 for (i=0;i<poles;i++)
364 iterate[i]=iterate[i+1];
368 for (i=0;i<=poles;i++)
376 int main(int argc,char **argv)
380 long i,j,iter,hj,realiter=0;
381 unsigned int size,is,id;
383 double **mat,**inverse,*vec,**coeff,**diff,**hseries;
384 double **oldcoeff,*diffcoeff=NULL;
385 double hdiff,**xdiff=NULL,avpm;
386 double loglikelihood,aic,alldiff;
388 if (scan_help(argc,argv))
389 show_options(argv[0]);
391 scan_options(argc,argv);
392 #ifndef OMIT_WHAT_I_DO
393 if (verbosity&VER_INPUT)
394 what_i_do(argv[0],WID_STR);
397 infile=search_datafile(argc,argv,NULL,verbosity);
401 if (outfile == NULL) {
403 check_alloc(outfile=(char*)calloc(strlen(infile)+5,(size_t)1));
404 strcpy(outfile,infile);
405 strcat(outfile,".ari");
408 check_alloc(outfile=(char*)calloc((size_t)10,(size_t)1));
409 strcpy(outfile,"stdin.ari");
413 test_outfile(outfile);
416 series=(double**)get_multi_series(infile,&length,exclude,&dim,"",dimset,
419 series=(double**)get_multi_series(infile,&length,exclude,&dim,column,
422 check_alloc(my_average=(double*)malloc(sizeof(double)*dim));
424 for (i=0;i<ipoles;i++)
431 set_averages_to_zero();
433 if (poles >= length) {
434 fprintf(stderr,"It makes no sense to have more poles than data! Exiting\n");
435 exit(AR_MODEL_TOO_MANY_POLES);
438 if ((arpoles >= length) || (mapoles >= length)) {
439 fprintf(stderr,"It makes no sense to have more poles than data! Exiting\n");
440 exit(AR_MODEL_TOO_MANY_POLES);
445 aindex=make_ar_index();
447 check_alloc(vec=(double*)malloc(sizeof(double)*ardim));
448 check_alloc(mat=(double**)malloc(sizeof(double*)*ardim));
449 for (i=0;i<ardim;i++)
450 check_alloc(mat[i]=(double*)malloc(sizeof(double)*ardim));
452 check_alloc(coeff=(double**)malloc(sizeof(double*)*dim));
453 inverse=build_matrix(mat,ardim);
454 for (i=0;i<dim;i++) {
455 build_vector(vec,ardim,i);
456 coeff[i]=multiply_matrix_vector(inverse,vec,ardim);
459 check_alloc(diff=(double**)malloc(sizeof(double*)*dim));
461 check_alloc(diff[i]=(double*)malloc(sizeof(double)*length));
463 pm=make_residuals(diff,coeff,ardim);
466 for (i=0;i<ardim;i++) {
483 check_alloc(xdiff=(double**)malloc(sizeof(double*)*ITER));
485 check_alloc(xdiff[i]=(double*)malloc(sizeof(double)*dim));
487 armadim=(arpoles+mapoles)*dim;
488 aindex=make_arima_index(arpoles,mapoles);
491 check_alloc(hseries=(double**)malloc(sizeof(double*)*2*dim));
492 for (i=0;i<dim;i++) {
493 check_alloc(hseries[i]=(double*)malloc(sizeof(double)*length));
494 check_alloc(hseries[i+dim]=(double*)malloc(sizeof(double)*length));
495 for (j=0;j<length;j++) {
496 hseries[i][j]=series[i][j];
497 hseries[i+dim][j]=diff[i][j];
502 free(series[i]-ipoles);
507 check_alloc(oldcoeff=(double**)malloc(sizeof(double*)*dim));
508 for (i=0;i<dim;i++) {
509 check_alloc(oldcoeff[i]=(double*)malloc(sizeof(double)*armadim));
510 for (j=0;j<armadim;j++)
513 check_alloc(diffcoeff=(double*)malloc(sizeof(double)*ITER));
515 for (iter=1;iter<=ITER;iter++) {
516 check_alloc(vec=(double*)malloc(sizeof(double)*armadim));
517 check_alloc(mat=(double**)malloc(sizeof(double*)*armadim));
518 for (i=0;i<armadim;i++)
519 check_alloc(mat[i]=(double*)malloc(sizeof(double)*armadim));
521 check_alloc(coeff=(double**)malloc(sizeof(double*)*dim));
523 poles=(arpoles > mapoles)? arpoles:mapoles;
526 inverse=build_matrix(mat,armadim);
528 for (i=0;i<dim;i++) {
529 build_vector(vec,armadim,i);
530 coeff[i]=multiply_matrix_vector(inverse,vec,armadim);
533 pm=make_residuals(diff,coeff,armadim);
535 for (j=0;j<dim;j++) {
538 for (i=offset;i<length;i++)
539 hdiff += sqr(series[hj][i]-diff[j][i]);
540 for (i=0;i<length;i++) {
541 series[hj][i]=diff[j][i];
543 xdiff[iter-1][j]=sqrt(hdiff/(double)(length-offset));
547 for (i=0;i<armadim;i++) {
554 diffcoeff[iter-1]=0.0;
556 for (j=0;j<dim;j++) {
557 diffcoeff[iter-1] += sqr(coeff[i][j]-oldcoeff[i][j]);
558 oldcoeff[i][j]=coeff[i][j];
560 diffcoeff[iter-1]=sqrt(diffcoeff[iter-1]/(double)armadim);
561 alldiff=xdiff[iter-1][0];
563 if (xdiff[iter-1][i] > alldiff)
564 alldiff=xdiff[iter-1][i];
566 if (alldiff < convergence)
579 printf("#convergence of residuals in arima fit\n");
580 for (i=0;i<realiter;i++) {
581 printf("#iteration %ld ",i+1);
583 printf("%e ",xdiff[i][j]);
584 printf("%e",diffcoeff[i]);
589 loglikelihood= -log(pm[0]);
590 for (i=1;i<dim;i++) {
592 loglikelihood -= log(pm[i]);
594 loglikelihood *= ((double)length);
595 loglikelihood += -((double)length)*
596 ((1.0+log(2.*M_PI))*dim)/2.0;
598 printf("#average forcast error= %e\n",avpm);
599 printf("#individual forecast errors: ");
604 aic=2.0*(arpoles+mapoles)-2.0*loglikelihood;
606 aic=2.0*poles-2.0*loglikelihood;
607 printf("#Log-Likelihood= %e\t AIC= %e\n",loglikelihood,aic);
608 for (i=0;i<size;i++) {
612 printf("#x_%u(n-%u) ",id+1,is);
614 printf("#e_%u(n-%u) ",id+1-dim,is);
616 printf("%e ",coeff[j][i]);
619 if (!run_model || (verbosity&VER_USR1)) {
620 for (i=poles;i<length;i++) {
624 if (verbosity&VER_USR2)
625 printf("%e %e ",series[j][i]+my_average[j],diff[j][i]);
627 printf("%e ",diff[j][i]);
631 if (run_model && (ilength > 0)) {
633 iterate_model(coeff,pm,diff,NULL);
635 iterate_arima_model(coeff,pm,diff,NULL);
639 file=fopen(outfile,"w");
640 if (verbosity&VER_INPUT)
641 fprintf(stderr,"Opened %s for output\n",outfile);
643 fprintf(file,"#convergence of residuals in arima fit\n");
644 for (i=0;i<realiter;i++) {
645 fprintf(file,"#iteration %ld ",i+1);
647 fprintf(file,"%e ",xdiff[i][j]);
648 fprintf(file,"%e",diffcoeff[i]);
653 loglikelihood= -log(pm[0]);
654 for (i=1;i<dim;i++) {
656 loglikelihood -= log(pm[i]);
658 loglikelihood *= ((double)length);
659 loglikelihood += -((double)length)*
660 ((1.0+log(2.*M_PI))*dim)/2.0;
662 fprintf(file,"#average forcast error= %e\n",avpm);
663 fprintf(file,"#individual forecast errors: ");
665 fprintf(file,"%e ",pm[i]);
668 aic=2.0*(arpoles+mapoles)-2.0*loglikelihood;
670 aic=2.0*poles-2.0*loglikelihood;
671 fprintf(file,"#Log-Likelihood= %e\t AIC= %e\n",loglikelihood,aic);
672 for (i=0;i<size;i++) {
676 fprintf(file,"#x_%u(n-%u) ",id+1,is);
678 fprintf(file,"#e_%u(n-%u) ",id+1-dim,is);
680 fprintf(file,"%e ",coeff[j][i]);
683 if (!run_model || (verbosity&VER_USR1)) {
684 for (i=poles;i<length;i++) {
688 if (verbosity&VER_USR2)
689 fprintf(file,"%e %e ",series[j][i]+my_average[j],diff[j][i]);
691 fprintf(file,"%e ",diff[j][i]);
695 if (run_model && (ilength > 0)) {
697 iterate_model(coeff,pm,diff,file);
699 iterate_arima_model(coeff,pm,diff,file);
707 for (i=0;i<dim;i++) {