sera-applications/SHAPE_Package/SHAPE_ver2b.0/SSH/Nonpar_tr_O.m
2020-06-08 13:02:54 +02:00

507 lines
16 KiB
Matlab
Executable File

% [lamb_all,lamb,lamb_err,unit,eps,ierr,h,xx,ambd,Mmax,err,BIAS,SD]=
% Nonpar_tr(t,M,iop,Mmin,Mmax,Nsynth)
%
% BASED ON MAGNITUDE SAMPLE DATA M DETERMINES THE ROUND-OFF INTERVAL LENGTH
% OF THE MAGNITUDE DATA - eps, THE SMOOTHING FACTOR - h, CONSTRUCTS
% THE BACKGROUND SAMPLE - xx, CALCULATES THE WEIGHTING FACTORS - amb, AND
% THE END-POINT OF MAGNITUDE DISTRIBUTION Mmax FOR A USE OF THE NONPARAMETRIC
% ADAPTATIVE KERNEL ESTIMATORS OF MAGNITUDE DISTRIBUTION UNDER THE
% ASSUMPTION OF THE EXISTENCE OF THE UPPER LIMIT OF MAGNITUDE DISTRIBUTION.
%
% !! THIS FUNCTION MUST BE EXECUTED AT START-UP OF THE UPPER-BOUNDED
% NON-PARAMETRIC HAZARD ESTIMATION MODE !!
%
% AUTHOR: S. Lasocki ver 2 01/2015 within IS-EPOS project.
%
% DESCRIPTION: The kernel estimator approach is a model-free alternative
% to estimating the magnitude distribution functions. The smoothing factor
% h, is estimated using the least-squares cross-validation for the Gaussian
% kernel function. The final form of the kernel is the adaptive kernel.
% In order to avoid repetitions, which cannot appear in a sample when the
% kernel estimators are used, the magnitude sample data are randomized
% within the magnitude round-off interval. The round-off interval length -
% eps is the least non-zero difference between sample data or 0.1 is the
% least difference if greater than 0.1. The randomization is done
% assuming exponential distribution of m in [m0-eps/2, m0+eps/2], where m0
% is the sample data point and eps is the length of roud-off inteval. The
% shape parameter of the exponential distribution is estimated from the whole
% data sample assuming the exponential distribution. The background sample
% - xx comprises the randomized values of magnitude doubled symmetrically
% with respect to the value Mmin-eps/2: length(xx)=2*length(M). Weigthing
% factors row vector for the adaptive kernel is of the same size as xx.
% The mean activity rate, lamb, is the number of events >=Mmin into the
% length of the period in which they occurred.
% The upper limit of the distribution Mmax is evaluated using
% the Kijko-Sellevol generic formula. If convergence is not reached the
% Whitlock @ Robson simplified formula is used:
% Mmaxest= 2(max obs M) - (second max obs M)).
%
% See: the references below for a more comprehensive description.
%
% This is a beta version of the program. Further developments are foreseen.
%
% REFERENCES:
%Silverman B.W. (1986) Density Estimation for Statistics and Data Analysis,
% Chapman and Hall, London
%Kijko A., Lasocki S., Graham G. (2001) Pure appl. geophys. 158, 1655-1665
%Lasocki S., Orlecka-Sikora B. (2008) Tectonophysics 456, 28-37
%Kijko, A., and M.A. Sellevoll (1989) Bull. Seismol. Soc. Am. 79, 3,645-654
%Lasocki, S., Urban, P. (2011) Acta Geophys 59, 659-673,
% doi: 10.2478/s11600-010-0049-y
%
% INPUT:
% t - vector of earthquake occurrence times
% M - vector of earthquake magnitudes (sample data)
% iop - determines the used unit of time. iop=0 - 'day', iop=1 - 'month',
% iop=2 - 'year'
% Mmin - lower bound of the distribution - catalog completeness level
% Mmax - upper limit of Magnitude Distribution. Can be set by user, or
% estimate within the program - it then should be set as Mmax=[].
%
% OUTPUT
% lamb_all - mean activity rate for all events
% lamb - mean activity rate for events >= Mmin
% lamb_err - error paramter on the number of events >=Mmin. lamb_err=0
% for 50 or more events >=Mmin and the parameter estimation is
% continued, lamb_err=1 otherwise, all output paramters except
% lamb_all and lamb are set to zero and the function execution is
% terminated.
% unit - string with name of time unit used ('year' or 'month' or 'day').
% eps - length of round-off interval of magnitudes.
% ierr - h-convergence indicator. ierr=0 if the estimation procedure of
% the optimal smoothing factor has converged (a zero of the h functional
% has been found), ierr=1 when multiple zeros of h functional were
% encountered - the largest h is accepted, ierr = 2 when h functional did
% not zeroe - the approximate h value is taken.
% h - kernel smoothing factor.
% xx - the background sample for the nonparametric estimators of magnitude
% distribution
% ambd - the weigthing factors for the adaptive kernel
% Mmax - upper limit of magnitude distribution
% err - error parameter on Mmax estimation, err=0 - convergence, err=1 -
% no converegence of Kijko-Sellevol estimator, Robinson @ Whitlock
% method used.
% BIAS - Mmax estimation Bias (Lasocki and Urban, 2011)
% SD - Mmax standard deviation (Lasocki ands Urban, 2011)
%
% LICENSE
% This file is a part of the IS-EPOS e-PLATFORM.
%
% This is free software: you can redistribute it and/or modify it under
% the terms of the GNU General Public License as published by the Free
% Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
function [lamb_all,lamb,lamb_err,unit,eps,ierr,h,xx,ambd,Mmax,err,BIAS,SD]=...
Nonpar_tr_Ob(t,M,iop,Mmin,Mmax,Nsynth)
if nargin==5;Nsynth=[];end % K08DEC2019
if isempty(t) || numel(t)<3 isempty(M(M>=Mmin)) %K03OCT
t=[1 2];M=[1 2]; end %K30SEP
lamb_err=0;
n=length(M);
t1=t(1);
%%% %%%%%%%%%%%%%MICHAL
xx=NaN;
ambd=NaN;
%%% %%%%%%%%%%%%%MICHAL
for i=1:n
if M(i)>=Mmin; break; end
t1=t(i+1);
end
t2=t(n);
for i=n:1
if M(i)>=Mmin; break; end
t2=t(i-1);
end
nn=0;
for i=1:n
if M(i)>=Mmin
nn=nn+1;
end
end
% SL 03MAR2015 ----------------------------------
[NM,unit]=time_diff(t(1),t(n),iop);
lamb_all=n/NM;
[NM,unit]=time_diff(t1,t2,iop);
lamb=nn/NM;
% SL 03MAR2015 ----------------------------------
if nn<50
eps=0;ierr=0;h=0;Mmax=0;err=0;
lamb_err=1;
BIAS=NaN;SD=NaN; %%% K 08NOV2019
return;
end
eps=magn_accur(M);
n=0;
for i=1:length(M)
if M(i)>=Mmin;
n=n+1;
x(n)=M(i);
end
end
x=sort(x)';
beta=1/(mean(x)-Mmin+eps/2);
[xx]=korekta(x,Mmin,eps,beta);
xx=sort(xx);
clear x;
xx = podwajanie(xx,Mmin-eps/2);
[h,ierr]=hopt(xx);
[ambd]=scaling(xx,h);
if isempty(Mmax) %K30AUG2019 - Allow for manually set Mmax
err_Mmax=1;[Mmax,err]=Mmaxest(xx,h,Mmin-eps/2); % err_Mmax added 04DEC2019
else
err_Mmax=0;err=0; %K30AUG2019
end
% Estimation of Mmax Bias %%% K 04DEC2019
% (Lasocki and Urban, 2011, doi:10.2478/s11600-010-0049-y)
if isempty(Nsynth)==0 && err_Mmax==1 % set number of synthetic datasets, e.g. 10000
[BIAS,SD]=Mmax_Bias_GR(t,M,Mmin,Mmax,err,h,xx,ambd,Nsynth);
elseif isempty(Nsynth)==0 && err_Mmax==0;
warning('Mmax must be empty for BIAS calculation');BIAS=[];SD=[];
else; BIAS=0;SD=0;
end
end
function [NM,unit]=time_diff(t1,t2,iop) % SL 03MAR2015 ----------------------------------
% TIME DIFFERENCE BETWEEEN t1,t2 EXPRESSED IN DAY, MONTH OR YEAR UNIT
%
% t1 - start time (in MATLAB numerical format)
% t2 - end time (in MATLAB numerical format) t2>=t1
% iop - determines the used unit of time. iop=0 - 'day', iop=1 - 'month',
% iop=2 - 'year'
%
% NM - number of time units from t1 to t2
% unit - string with name of time unit used ('year' or 'month' or 'day').
if iop==0
NM=(t2-t1);
unit='day';
elseif iop==1
V1=datevec(t1);
V2=datevec(t2);
NM=V2(3)/eomday(V2(1),V2(2))+V2(2)+12-V1(2)-V1(3)/eomday(V1(1),V1(2))...
+(V2(1)-V1(1)-1)*12;
unit='month';
else
V1=datevec(t1);
V2=datevec(t2);
NM2=V2(3);
if V2(2)>1
for k=1:V2(2)-1
NM2=NM2+eomday(V2(1),k);
end
end
day2=365; if eomday(V2(1),2)==29; day2=366; end;
NM2=NM2/day2;
NM1=V1(3);
if V1(2)>1
for k=1:V1(2)-1
NM1=NM1+eomday(V1(1),k);
end
end
day1=365; if eomday(V1(1),2)==29; day1=366; end;
NM1=(day1-NM1)/day1;
NM=NM2+NM1+V2(1)-V1(1)-1;
unit='year';
end
end
function [m_corr]=korekta(m,Mmin,eps,beta)
% RANDOMIZATION OF MAGNITUDE WITHIN THE ACCURACY INTERVAL
%
% m - input vector of magnitudes
% Mmin - catalog completeness level
% eps - accuracy of magnitude
% beta - the parameter of the unbounded exponential distribution
%
% m_corr - vector of randomized magnitudes
%
F1=1-exp(-beta*(m-Mmin-0.5*eps));
F2=1-exp(-beta*(m-Mmin+0.5*eps));
u=rand(size(m));
w=u.*(F2-F1)+F1;
m_corr=Mmin-log(1-w)./beta;
end
function x2 = podwajanie(x,x0)
% DOUBLES THE SAMPLE
% If the sample x(i) is is truncated from the left hand side and belongs
% to the interval [x0,inf) or it is truncated from the right hand side and
% belongs to the interval (-inf,x0]
% then the doubled sample is [-x(i)+2x0,x(i)]
% x - is the column data vector
% x2 - is the column vector of data doubled and sorted in the ascending
% order
x2=[-x+2*x0
x];
x2=sort(x2);
end
function [h,ierr]=hopt(x)
%Estimation of the optimal smoothing factor by means of the least squares
%method
% x - column data vector
% The result is an optimal smoothing factor
% ierr=0 - convergence, ierr=1 - multiple h, ierr=2 - approximate h is used
% The function calls the procedure FZERO for the function 'funct'
% NEW VERSION 2 - without a square matrix. Also equipped with extra zeros
% search
% MODIFIED JUNE 2014
ierr=0;
n=length(x);
x=sort(x);
interval=[0.000001 2*std(x)/n^0.2];
x1=funct(interval(1),x);
x2=funct(interval(2),x);
if x1*x2<0
fun = @(t) funct(t,x); % for octave
x0 =interval; % for octave
[hh(1),fval,exitflag] = fzero(fun,x0); % for octave
% Extra zeros search
jj=1;
for kk=2:7
interval(1)=1.1*hh(jj);
interval(2)=interval(1)+(kk-1)*hh(jj);
x1=funct(interval(1),x);
x2=funct(interval(2),x);
if x1*x2<0
jj=jj+1;
fun = @(t) funct(t,x); % for octave
x0 =interval; % for octave
[hh(jj),fval,exitflag] = fzero(fun,x0); % for octave
end
end
if jj>1;ierr=1;end
h=max(hh);
if exitflag==1;return;end
end
h=0.891836*(mean(x)-x(1))/(n^0.2);
ierr=2;
end
function [fct]=funct(t,x)
p2=1.41421356;
n=length(x);
yy=zeros(size(x));
for i=1:n,
xij=(x-x(i)).^2/t^2;
y=exp(-xij/4).*((xij/2-1)/p2)-2*exp(-xij/2).*(xij-1);
yy(i)=sum(y);
end;
fct=sum(yy)-2*n;
clear xij y yy;
end
function [ambd]=scaling(x,h)
% EVALUATES A VECTOR OF SCALING FACTORS FOR THE NONPARAMETRIC ADAPTATIVE
% ESTIMATION
% x - the n dimensional column vector of data values sorted in the ascending
% order
% h - the optimal smoothing factor
% ambd - the resultant n dimensional row vector of local scaling factors
n=length(x);
c=sqrt(2*pi);
gau=zeros(1,n);
for i=1:n,
gau(i)=sum(exp(-0.5*((x(i)-x)/h).^2))/c/n/h;
end
g=exp(mean(log(gau)));
ambd=sqrt(g./gau);
end
function [eps]=magn_accur(M)
x=sort(M);
d=x(2:length(x))-x(1:length(x)-1);
eps=min(d(d>0));
if eps>0.1; eps=0.1;end
end
function [Mmax,ierr]=Mmaxest(x,h,Mmin)
% ESTIMATION OF UPPER BOUND USING NONPARAMETRIC DISTRIBUTION FUNCTIONS
% x - row vector of magnitudes (basic sample).
% h - optimal smoothing factor
% Mmax - upper bound
% ierr=0 if basic procedure converges, ierr=1 when Robsen & Whitlock Mmas
% estimation
% Uses function 'dystryb'
n=length(x);
ierr=1;
x=sort(x);
Mmax1=x(n);
for i=1:50,
d=normcdf((Mmin-x)./h);
mian=sum(normcdf((Mmax1-x)./h)-d);
Mmax=x(n)+moja_calka(@dystryb,x(1),Mmax1,0.00001,h,mian,x,d);
if abs(Mmax-Mmax1)<0.01
ierr=0;break;
end
Mmax1=Mmax;
end
if (ierr==1 || Mmax>9)
Mmax=2*x(n)-x(n-1);
ierr=1;
end
end
function [y]=dystryb(z,h,mian,x,d)
n=length(x);
m=length(z);
for i=1:m,
t=(z(i)-x)./h;
t=normcdf(t);
yy=sum(t-d);
y(i)=(yy/mian)^n;
end
end
function [calka,ier]=moja_calka(funfc,a,b,eps,varargin)
% Integration by means of 16th poit Gauss method. Adopted from CERNLIBRARY
% funfc - string with the name of function to be integrated
% a,b - integration limits
% eps - accurracy
% varargin - other parameters of function to be integrated
% calka - integral
% ier=0 - convergence, ier=1 - no conbergence
persistent W X CONST
W=[0.101228536290376 0.222381034453374 0.313706645877887 ...
0.362683783378362 0.027152459411754 0.062253523938648 ...
0.095158511682493 0.124628971255534 0.149595988816577 ...
0.169156519395003 0.182603415044924 0.189450610455069];
X=[0.960289856497536 0.796666477413627 0.525532409916329 ...
0.183434642495650 0.989400934991650 0.944575023073233 ...
0.865631202387832 0.755404408355003 0.617876244402644 ...
0.458016777657227 0.281603550779259 0.095012509837637];
CONST=1E-12;
delta=CONST*abs(a-b);
calka=0.;
aa=a;
y=b-aa;
ier=0;
while abs(y)>delta,
bb=aa+y;
c1=0.5*(aa+bb);
c2=c1-aa;
s8=0.;
s16=0.;
for i=1:4,
u=X(i)*c2;
s8=s8+W(i)*(feval(funfc,c1+u,varargin{:})+feval(funfc,c1-u,varargin{:}));
end
for i=5:12,
u=X(i)*c2;
s16=s16+W(i)*(feval(funfc,c1+u,varargin{:})+feval(funfc,c1-u,varargin{:}));
end
s8=s8*c2;
s16=s16*c2;
if abs(s16-s8)>eps*(1+abs(s16))
y=0.5*y;
calka=0.;
ier=1;
else
calka=calka+s16;
aa=bb;
y=b-aa;
ier=0;
end
end
end
% --------------------- Mmax BIAS estimation routine ---------------------- K 08NOV2019
function [BIAS,SD]=Mmax_Bias_GR(t,m,Mc,Mmax1,err,h,xx,ambd,synth)
s1=sort(unique(m));s2=s1(2:length(s1))-s1(1:length(s1)-1);EPS=min(s2);
if err~=0
warning('process did not converge!!')
end
MAXm=max(m);N=numel(m(m>=Mc));DeltaM=MAXm-Mc; %beta=b*log(10);
[mag,PDF_NPT,CDF_NPT]=dist_NPT(Mc-EPS,Mmax1+EPS,0.01,Mc,EPS,h,xx,ambd,Mmax1);
for j=1:synth %set number of synthetic datasets, default is 10000
% % CDF:
% j
% linear interpolation to assign magnitude values from a uniform distribution sample
iM=rand(1,N);M1=interp1q(CDF_NPT,mag,iM');
br(j)=1/(log(10)*(mean(M1)-min(M1)+EPS/2));DM=range(M1);
Mmax=max(M1);
% Iteration Process to estimate b and Mmax
b1=10;best=[1.0 10.0];i=1;
while min(abs(diff(best)))>0.00001
w=exp(b1*(Mmax-Mc));E1=expint(N/(w-1));E2=expint(N*w/(w-1));
%E=expint(w);
Mme=Mmax+(E1-E2)/(b1*exp(-N/(w-1)))+(Mc)*exp(-N); %Mme=round(Mme/EPS)*EPS;
if isnan(Mme)
KM=sort(unique(M1),'descend');
Mme=2*KM(1)-KM(2);
end
fun=@(bb) 1/bb+(Mme-Mc)/(1-exp(bb*(Mme-Mc)))-mean(M1)+Mc-EPS/2; %consider th5 last 0.05 term
b1=fzero(fun,1);best(i)=b1;i=i+1;
if i==50
warning('process did not converge!!');break
end
end
be(j)=b1/log(10);
Me(j)=Mme;dm(j)=DM;Mm(j)=Mmax;
end
BIAS=mean(MAXm-Me);
SD=std(MAXm-Me);
%b-mean(be) %check b-value difference
%histogram(be)
% MAXm: maximum magnitude in the real catalog
% Mmax: maximum magnitudes observed in the synthetic catalogs (rounded)
% Me: maximum magnitude estimates for the synthetic catalogs
% Mmax1: maximum magnitude estimated by GRT
end