Home > 04-Estimation > GMM > Demo-GMM > demos_GMM_regression.m

demos_GMM_regression

PURPOSE ^

% test GMM with linear regression with check

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

% test GMM with linear regression with check

 1. Single example
 2. checking the correctness
 3. the effect of varying noise level
 4. the effect of varying density of observations
 
 Wolfgang Förstner
 wfoerstn@uni-bonn.de

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 %% test GMM with linear regression with check
0002 %
0003 % 1. Single example
0004 % 2. checking the correctness
0005 % 3. the effect of varying noise level
0006 % 4. the effect of varying density of observations
0007 %
0008 % Wolfgang Förstner
0009 % wfoerstn@uni-bonn.de
0010 
0011 close all
0012 clear sigma_x12
0013 
0014 addpath(genpath('../../../General-Functions/'));
0015 addpath(genpath('../Functions-GMM/'));
0016 
0017 
0018 disp('============================================')
0019 disp('============================================')
0020 disp('             Linear regression              ')
0021 disp('            l = x[1] + x[2] * t             ')
0022 disp('--------------------------------------------')
0023 
0024 %% General control parameters
0025 
0026 % choose one or several of the following four parts of the example
0027 %choice=[1,0,0,0];  % single example
0028 %choice=[0,1,0,0];  % checking the correctness%
0029 %choice=[0,0,1,0];  % the effect of varying noise level
0030 %choice=[0,0,0,1];  % the effect of varying density of observations
0031 choice=[1,1,1,1];
0032 
0033 % initialization of random numbers
0034 % = 0 CPU-time dependent
0035 % > 0 fixed
0036 %init_rand  = 0;
0037 init_rand   = 15;     % standard = 15
0038 
0039 %% random number intialization
0040 init_rand_seed(init_rand);
0041 
0042 %%
0043 % unknown parameters
0044 xt  = [0.5,1]';
0045 U   = 2;
0046 disp(['True_parameters = [', num2str(xt'),']'])
0047 
0048 % Significance level
0049 S = 0.99;
0050 
0051 % non-centrality parameter
0052 delta0 = 4;
0053 
0054 %% Individual control parameters
0055 
0056 % 1. for diagnostics: set of parameters to be evaluated
0057 %------------------------------------------------------
0058 rU = 1;  % can be [], 1, 2, or [1,2]; 1 for offset, 2 for scale
0059 
0060 
0061 % 2. for checking the result
0062 % --------------------------
0063 % type of generated data
0064 % data_type = -10;  % 10 random position in 1:10
0065 data_type2 =   1;  % 4 observations with leverage point at i=4;
0066 
0067 % number of samples for estimation
0068 K2 = 25;
0069 
0070 
0071 % 3. for varying sigma
0072 % --------------------
0073 % type of generated data
0074 % data_type = -10;  % 10 random position in 1:10
0075 data_type3 =   1;  % 4 observations with leverage point at i=4;
0076 
0077 % varying sigmas
0078 sigmas = [0.01,0.05,0.1,0.15,0.20];
0079 
0080 % number of samples per sigma
0081 K3 = 25;
0082 
0083 
0084 % 4. for varying the density/number N of observations
0085 % ---------------------------------------------------
0086 
0087 % varying N's
0088 %Ns = [5,10,20,40,80,160,320];
0089 Ns = round(exp(1+2*log(2):0.5*log(2):7)/exp(1));
0090 
0091 % number of samples per N
0092 K4 = 5;
0093 
0094 
0095 
0096 
0097 %% 1. Single example #########################################################
0098 if choice(1) == 1
0099     disp('============================================')
0100     disp('         Example with evaluation            ')
0101     disp('............................................')
0102 
0103     % initializing random numbers-------------------------------------------
0104     
0105     init_rand_seed(init_rand);
0106     
0107     
0108     if ~isempty(rU)
0109         if rU(1) == 1
0110             disp('parameters_to_be_evaluated = intercept')
0111         else
0112             disp('parameters_to_be_evaluated = slope')
0113         end
0114     end
0115     
0116     % times of observation ------------------------------------------------
0117     tv    = [-1,1,2,14]';
0118     N   = length(tv);
0119     disp(['Number of observations = ', num2str(N)])%
0120     Am  = [ones(N,1), tv];
0121     av  = zeros(N,1);
0122     
0123     % stochastical model --------------------------------------------------
0124     sigma= 0.5;
0125     Cov_ll = sigma^2 * eye(N);
0126     
0127     % observations --------------------------------------------------------
0128     true_error =randn(N,1)*sigma;
0129     lv = Am*xt+true_error;
0130     % add outlier err(4)=+4 in 4th observation
0131     lv(4)=lv(4)-4;
0132     
0133     disp('    no         t        tl        te         l')
0134     disp([(1:4)',tv,Am*xt,lv-Am*xt,lv ])
0135    
0136     % estimate parameters -------------------------------------------------
0137     start=cputime;
0138     [est_x,Cov_xx,sigma_0q,R,vv,zv,riv,nabla_lv,muv,muv1,uv1q,uv2]=...
0139         GaussMarkovModelLinear(lv,Cov_ll,Am,av,rU);
0140     CPU_time = (cputime-start)                                              %#ok<*NOPTS>
0141     
0142     warning('off','all');
0143     
0144     % --------------- output and diagnostics ------------------------------
0145     ScrS = plot_init;  
0146     f1 = figure('Name','Regression and Diagnostic','Color','w','Position',[20 ScrS(2)-0.7*ScrS(2)-100 0.6*ScrS(1) 0.7*ScrS(2)]);
0147     subplot(2,4,1)
0148     hold on
0149     plot(tv,lv,'.r','Markersize',12)       % given data
0150     plot([min(tv),max(tv)],[[1,min(tv)]*est_x,[1,max(tv)]*est_x],'-b')
0151     xlim([min(tv)-1,max(tv)+1])
0152     title({'linear regression', strcat('$\sigma_{l} = ',num2str(sigma),'$')})
0153     
0154     xlabel('t')
0155     
0156     
0157     subplot(2,4,2)
0158     hold on
0159     plot(tv,vv,'.r')
0160     plot([min(tv),max(tv)],[0,0],'-b')
0161     title({'residuals', strcat('max($v_i$) = ',num2str(max(abs(vv))))})
0162     xlim([min(tv)-1,max(tv)+1])
0163     
0164     xlabel('t')
0165     
0166     subplot(2,4,3)
0167     hold on
0168     plot(tv,riv,'.r')
0169     title({'redundancy number', strcat('min($r_i$) = ',num2str(min(riv)))})
0170     xlim([min(tv)-1,max(tv)+1])
0171     
0172     xlabel('t')
0173     
0174     subplot(2,4,4)
0175     hold on
0176     est_e=-vv./riv;
0177     plot(tv,est_e,'.r')
0178     plot([min(tv),max(tv)],[0,0],'-b')
0179     title({'estimated error', strcat('$\max(|\hat{e}_i|)) = $',num2str(max(abs(est_e))))})
0180     xlim([min(tv)-1,max(tv)+1])
0181     ylim([min(est_e)-1,max(est_e)+1])
0182     xlabel('t')
0183     
0184     
0185     
0186     subplot(2,4,5)
0187     hold on
0188     plot(tv,nabla_lv,'.r')
0189     title({'min detectable outliers', strcat('$\max(\nabla_{{0l}_i}) = $',...
0190         num2str(max(nabla_lv)))})
0191     xlim([min(tv)-1,max(tv)+1])
0192     xlabel('t')
0193     
0194     
0195     subplot(2,4,6)
0196     hold on
0197     plot(tv,zv,'.r')
0198     plot([min(tv),max(tv)],[2.58,2.58],'-r')
0199     plot([min(tv),max(tv)],-[2.58,2.58],'-r')
0200     title({'test statistics', strcat('max($z_i$) = ',num2str(max(abs(zv))))})
0201     xlim([min(tv)-1,max(tv)+1])
0202     xlabel('t')
0203     
0204     
0205     
0206     subplot(2,4,7)
0207     hold on
0208     plot(tv,muv,'.r')
0209     title({'sensitivity factors', strcat('max($\mu_i$) = ',num2str(max(muv)))})
0210     xlim([min(tv)-1,max(tv)+1])
0211     xlabel('t')
0212     
0213     
0214     %
0215     
0216     disp('........................................................')
0217     disp('                      diagnostics                       ')
0218     disp('........................................................')
0219     disp(strcat('    no         t         error     v         r     est_error',...
0220             '     z         z*   nabla_0l  nabla_0^*l' ));
0221     o=[(1:N)', tv, lv-Am*xt, vv, riv, -vv./riv, ...
0222         zv, zv.*sqrt(riv), delta0*sigma./sqrt(riv), delta0*sigma./riv ]
0223     
0224     Estimated_parameters      = est_x'
0225     Theoretical_precision     = sqrt(diag(Cov_xx))'
0226     Redundancy                = R
0227     Estimated_sigma_0         = sqrt(sigma_0q)
0228     
0229     [m,i]=max(abs(true_error));
0230     disp(horzcat('Maximal true error                 = ',num2str(m,'% 12.5f'),...
0231         ' at: ', num2str(i,'% 3.0f')));
0232     [m,i]=max(abs(vv));
0233     disp(horzcat('Maximal residual                   = ',num2str(m,'% 12.5f'),...
0234         ' at: ', num2str(i,'% 3.0f')));
0235     [m,i]=min(riv);
0236     disp(horzcat('Minimal redundancy number          = ',num2str(m,'% 12.5f'),...
0237         ' at: ', num2str(i,'% 3.0f')));
0238     
0239     [m,i]=max(abs(zv));
0240     if max(abs(zv)) > norminv(S,0,1)
0241         disp(horzcat('Maximal test statistic             = ',num2str(m,'% 12.5f'),...
0242             ' at: ', num2str(i,'% 3.0f')), ' ***');
0243     else
0244         disp(horzcat('Maximal test statistic             = ',num2str(m,'% 12.5f'),...
0245             ' at: ', num2str(i,'% 3.0f')));
0246     end
0247     
0248     [m,i]=max(nabla_lv);
0249     disp(horzcat('Max. of minimal detectable outlier = ',num2str(m,'% 12.5f'),...
0250         ' at: ', num2str(i,'% 3.0f')));
0251     
0252     [m,i]=max(muv);
0253     disp(horzcat('Max. sensitivity factor            = ',num2str(m,'% 12.5f'),...
0254         ' at: ', num2str(i,'% 3.0f')));
0255     
0256     if ~isempty(rU)
0257         [m,i]=max(muv1);
0258         disp(horzcat('Max. sensitivity factor selected   = ',num2str(m,'% 12.5f'),...
0259             ' at: ', num2str(i,'% 3.0f')));
0260     end
0261     
0262     
0263     % show sensitivity factors --------------------------------------------
0264     m=2.995;range=0:m/200:m;
0265     N=10;
0266     mux=sqrt((1+range.^2)./(N-(1+range.^2)));
0267     mu1=sqrt(1./(N-(1+range.^2)));
0268     mu2=sqrt(range.^2./(N-(1+range.^2)));
0269     
0270     f2 = figure('Name','Sensitivity factor mu_x','Color','w','Position',[30+0.35*ScrS(1) ScrS(2)-0.4*ScrS(2)-110 0.3*ScrS(1) 0.4*ScrS(2)]);
0271     plot(range,mux,'-r','LineWidth',2);
0272     title('sensitivity factor $\mu_x$ = f(distance)')
0273     xlabel('$d$')
0274     ylabel('$\mu_x$')
0275     ylim([0,5])
0276     
0277     f3 = figure('Name','Sensitivity factor mu_offset','Color','w','Position',[0.7*ScrS(1) ScrS(2)-0.4*ScrS(2)-110 0.3*ScrS(1) 0.4*ScrS(2)]);
0278     hold on
0279     plot(range,mu1,'-r','LineWidth',2);
0280     plot(range,mux,'-g','LineWidth',2);
0281     title('sensitivity factor $\mu_{\mbox{offset}}$ (red) and $\mu_x$ (green) = f(distance)')
0282     ylim([0,5])
0283     xlabel('$d$')
0284     ylabel('$\mu_1,\ \mu_x$')
0285     ylim([0,5])
0286     
0287     f4 = figure('Name','Sensitivity factor mu_offset','Color','w','Position',[30 20 0.3*ScrS(1) 0.4*ScrS(2)]);
0288     hold on
0289     plot(range,mu2,'-r','LineWidth',2);
0290     plot(range,mux,'-g','LineWidth',2);
0291     title('sensitivity factor $\mu_{scale}$ (red) and $\mu_x$ (green) = f(distance)')
0292     ylim([0,5])
0293     xlabel('$d$')
0294     ylabel('$\mu_2,\ \mu_x$       ')
0295     ylim([0,5])
0296     
0297 end
0298 
0299 
0300 
0301 %% 2. checking the correctness
0302 if choice(2) == 1
0303     
0304     disp('========================================================')
0305     disp('             checking the correctness                   ')
0306     disp('........................................................')
0307     
0308     % initializing random numbers -----------------------------------------
0309     % the next two lines can be exchanged
0310     init_rand_seed(init_rand)
0311       
0312     % K: number of samples ------------------------------------------------
0313     K=K2;  % relative accuracy is sqrt(1/K)
0314     
0315     
0316     switch data_type2
0317         case 1
0318             tv = [-1,1,2,14]';
0319         otherwise
0320             tv = rand(10,1)*10;
0321             tv = sort(tv);
0322             % sigma  = 1.5;
0323     end
0324     
0325     N   = length(tv);
0326     disp(['Number of observations = ', num2str(N)])
0327     Am  = [ones(N,1), tv];
0328     av  = zeros(N,1);
0329     
0330     % stochastical model --------------------------------------------------
0331     sigma= 0.5;
0332     Cov_ll = sigma^2 * eye(N);
0333     
0334     % K samples
0335     est_xs   = zeros(K,2);
0336     est_s0qs = zeros(K,1);
0337     start=cputime;
0338     for k = 1:K
0339         true_error = randn(N,1)*sigma;
0340         lv = Am*xt+true_error;
0341         
0342         % estimate parameters
0343         
0344         [est_x,Cov_xx,sigma_0q,R,vv,zv,riv,nabla_lv,muv,muv1,uv1q,uv2]=...
0345             GaussMarkovModelLinear(lv,Cov_ll,Am,av,rU);
0346         est_s0qs(k)   = sigma_0q;
0347         est_xs(k,:)   = est_x';
0348     end
0349     
0350     CPU_time_per_sample=(cputime-start)/K
0351     
0352     warning('off','all');
0353     
0354     % output and diagnostics ----------------------------------------------
0355        
0356     f5 = check_estimation_result(R,xt,Cov_xx,est_s0qs,est_xs,S,'linear regression');
0357     set(f5,'Name','Variance Factors','Position',[30+0.35*ScrS(1) 20 0.3*ScrS(1) 0.4*ScrS(2)])
0358     
0359 end
0360 
0361 %% 3. the effect of varying noise level
0362 
0363 if choice(3) == 1
0364     clear tv 
0365     disp('========================================================') 
0366     disp('          effect of varying noise level                 ')
0367     disp('........................................................')
0368     % initializing random numbers -----------------------------------------
0369     init_rand_seed(init_rand)
0370     
0371     switch data_type3
0372         case 1
0373             tv    = [-1,1,2,14]';
0374         otherwise
0375             tv=rand(10,1)*10;
0376             tv=sort(tv);
0377             sigma  = 1.5;
0378     end
0379     
0380     N   = length(tv);
0381     disp(['Number of observations = ', num2str(N)])
0382     Am  = [ones(N,1), tv];
0383     av  = zeros(N,1);
0384     
0385     
0386     K = K3;
0387     disp(['Number of samples per sigma = ', num2str(K)])
0388     
0389     
0390     disp(['sigma''s = [', num2str(sigmas),']'])
0391     L = length(sigmas);
0392     
0393     % for all sigmas
0394     sigma_x12 = zeros(L,2);
0395     for l=1:L
0396         sigma= sigmas(l);
0397         Cov_ll = sigma^2 * eye(N);
0398         %% generate K samples and estimate
0399         est_xs   = zeros(K,2);
0400         est_s0qs = zeros(K,1);
0401         
0402         % for all samples
0403         start=cputime;
0404         for k=1:K
0405             
0406             true_error = randn(N,1)*sigma;
0407             lv = Am*xt+true_error;
0408             
0409             % estimate parameters
0410             
0411             [est_x,Cov_xx,sigma_0q,R,vv,zv,riv,nabla_lv,muv,muv1,uv1q,uv2]=...
0412                 GaussMarkovModelLinear(lv,Cov_ll,Am,av,rU);
0413             est_s0qs(k)   = sigma_0q;
0414             est_xs(k,:)   = est_x';
0415         end
0416         
0417         warning('off','all');
0418         
0419         Cov_est_x_emp = cov(est_xs);
0420         %  set sigmas
0421         sigma_x12(l,:)=...
0422             [sqrt(Cov_est_x_emp(1,1)),sqrt(Cov_est_x_emp(2,2))];
0423         
0424     end
0425     
0426     % plot dependencies
0427     f6 = figure('Name','Effect of varying noise level ','Color','w','Position',[30 20 0.6*ScrS(1) 0.7*ScrS(2)]);
0428     subplot(2,2,1)
0429     hold on
0430     plot(sigmas,sigmas/sigmas(L)*sqrt(Cov_xx(1,1))','--b','Linewidth',2);
0431     plot(sigmas,sigma_x12(:,1)','-r','Linewidth',2);
0432     plot(sigmas,sigma_x12(:,1)','.k','MarkerSize',15);
0433     title('$\sigma_1(\sigma)$')
0434     xlabel('$\sigma$')
0435     subplot(2,2,2)
0436     hold on
0437     plot(sigmas,sigmas/sigmas(L)*sqrt(Cov_xx(2,2))','--b','Linewidth',2);
0438     plot(sigmas,sigma_x12(:,2)','-r','Linewidth',2);
0439     plot(sigmas,sigma_x12(:,2)','.k','MarkerSize',15);
0440     title('$\sigma_2(\sigma)$')
0441     xlabel('$\sigma$')
0442     subplot(2,2,3)
0443     hold on
0444     plot(sigmas,ones(1,L)./sigmas(L)*sqrt(Cov_xx(1,1))','--b','Linewidth',2);
0445     plot(sigmas,sigma_x12(:,1)./sigmas','-r','Linewidth',2);
0446     plot(sigmas,sigma_x12(:,1)./sigmas','.k','MarkerSize',15);
0447     title('$\sigma_1(\sigma)/\sigma$')
0448     ylim([0,1.2*max(sigma_x12(:,1)./sigmas')])
0449     xlabel('$\sigma$')
0450     subplot(2,2,4)
0451     hold on   
0452     plot(sigmas,ones(1,L)./sigmas(L)*sqrt(Cov_xx(2,2))','--b','Linewidth',2);
0453     plot(sigmas,sigma_x12(:,2)./sigmas','-r','Linewidth',2);
0454     plot(sigmas,sigma_x12(:,2)./sigmas','.k','MarkerSize',15);
0455     title('$\sigma_2(\sigma)/\sigma$')
0456     ylim([0,1.2*max(sigma_x12(:,2)./sigmas')])
0457     xlabel('$\sigma$')
0458     
0459 end
0460 
0461 %% 4. the effect of varying density of observations
0462 
0463 if choice(4) ==1
0464     
0465     disp('========================================================')
0466     disp('     effect of varying density of observations          ')
0467     disp('........................................................')
0468     
0469     % initializing random numbers -----------------------------------------
0470     % the next two lines can be exchanged
0471     init_rand_seed(init_rand)
0472     
0473     disp(strcat('Ns = [',num2str(Ns),']'))
0474 
0475     L = length(Ns);
0476     K = K4;
0477     
0478     disp(['Number of samples per N and estimates = ', num2str(K)])
0479 
0480     % sigma
0481     sigma=0.02;
0482     
0483     % needs to be set ...
0484     rU=1;
0485     
0486     lm=0;
0487     % for all Ns
0488     for l=1:L
0489         N=Ns(l);
0490         
0491         % also repeat sqrt(K) times Ns(l)
0492         v1=0;
0493         v2=0;
0494         for m=1:K
0495             
0496             % sorted observations in interval [0..100]
0497             %tv=(rand(N,1))*100;
0498             tv=(1/(2*N):100/(N):100-1/(2*N))';
0499             tv = sort(tv);                                                  %#ok<TRSRT>
0500             Am  = [ones(N,1), tv];
0501             av  = zeros(N,1);
0502             
0503             Cov_ll = sigma^2 * eye(N);
0504             
0505             % generate K samples and estimate
0506             est_xs   = zeros(K,2);
0507             est_s0qs = zeros(K,1);
0508             
0509             % for all samples
0510             start=cputime;
0511             for k=1:K
0512                 
0513                 true_error = randn(N,1)*sigma;
0514                 lv         = Am*xt + true_error;
0515                 
0516                 % estimate parameters
0517                 
0518                 [est_x,Cov_xx,sigma_0q,R,vv,zv,riv,nabla_lv,muv,muv1,uv1q,uv2]=...
0519                     GaussMarkovModelLinear(lv,Cov_ll,Am,av,rU);
0520                 est_s0qs(k)   = sigma_0q;
0521                 est_xs(k,:)   = est_x';
0522             end
0523             
0524             warning('off','all');
0525             
0526             Cov_est_x_emp = cov(est_xs);
0527             %  set sigmas
0528             v1=v1+Cov_est_x_emp(1,1);
0529             v2=v2+Cov_est_x_emp(2,2);
0530             
0531         end
0532         sigma_x12(l,:)=sqrt([v1/K,v2/K]);
0533     end
0534     
0535     % plot dependencies
0536     f7 = figure('Name','Effect of varying density of observations ','Color','w','Position',[ScrS(1)-0.6*ScrS(1) 20 0.6*ScrS(1) 0.7*ScrS(2)]);
0537     subplot(2,2,1)
0538     hold on
0539     plot(Ns,sqrt(Cov_xx(1,1))./sqrt(Ns/Ns(L))','--b','Linewidth',2);
0540     plot(Ns,sigma_x12(:,1)','-r','Linewidth',2);
0541     plot(Ns,sigma_x12(:,1)','.k','MarkerSize',15);
0542     title('$\sigma_1(N)$')
0543     xlabel('$N$')
0544     subplot(2,2,2)
0545     hold on
0546     plot(Ns,sqrt(Cov_xx(2,2))./sqrt(Ns/Ns(L))','--b','Linewidth',2);
0547     plot(Ns,sigma_x12(:,2)','-r','Linewidth',2);
0548     plot(Ns,sigma_x12(:,2)','.k','MarkerSize',15);
0549     title('$\sigma_2(N)$')
0550     xlabel('N')
0551     subplot(2,2,3)
0552     hold on
0553     plot(Ns,sqrt(Cov_xx(1,1))./sqrt(1/Ns(L))*ones(1,L),'--b','Linewidth',2);
0554     plot(Ns,sqrt(Ns').*sigma_x12(:,1),'-r','Linewidth',2);
0555     plot(Ns,sqrt(Ns').*sigma_x12(:,1),'.k','MarkerSize',15);
0556     title('$\sqrt{N} \sigma_1(N)$')
0557     xlabel('$N$')
0558     ylim([0,1.2*max(sqrt(Ns').*sigma_x12(:,1))])
0559     subplot(2,2,4)
0560     xlabel('$N$')
0561     hold on
0562     plot(Ns,sqrt(Cov_xx(2,2))./sqrt(1/Ns(L))*ones(1,L),'--b','Linewidth',2);
0563     plot(Ns,sqrt(Ns').*sigma_x12(:,2),'-r','Linewidth',2);
0564     plot(Ns,sqrt(Ns').*sigma_x12(:,2),'.k','MarkerSize',15);
0565     title('$\sqrt{N} \sigma_2(N)$')
0566     ylim([0,1.2*max(sqrt(Ns').*sigma_x12(:,2))])
0567     xlabel('$N$')
0568     
0569 end
0570 
0571 figure(f1)
0572 figure(f2)
0573 figure(f3)
0574 figure(f6)
0575 figure(f7)
0576 figure(f4)
0577 figure(f5)

Generated on Sat 21-Jul-2018 20:56:10 by m2html © 2005