sera-applications/SHAPE_Package/SHAPE_ver2b.0/SSH/Nonpar_tr_O.m

%   [lamb_all,lamb,lamb_err,unit,eps,ierr,h,xx,ambd,Mmax,err,BIAS,SD]=
%           Nonpar_tr(t,M,iop,Mmin,Mmax,Nsynth)
%
% BASED ON MAGNITUDE SAMPLE DATA M DETERMINES THE ROUND-OFF INTERVAL LENGTH
% OF THE MAGNITUDE DATA - eps, THE SMOOTHING FACTOR - h, CONSTRUCTS
% THE BACKGROUND SAMPLE - xx, CALCULATES THE WEIGHTING FACTORS - amb, AND
% THE END-POINT OF MAGNITUDE DISTRIBUTION Mmax FOR A USE OF THE NONPARAMETRIC
% ADAPTATIVE KERNEL ESTIMATORS OF MAGNITUDE DISTRIBUTION UNDER THE
% ASSUMPTION OF THE EXISTENCE OF THE UPPER LIMIT OF MAGNITUDE DISTRIBUTION.
%
% !! THIS FUNCTION MUST BE EXECUTED AT START-UP OF THE UPPER-BOUNDED
%       NON-PARAMETRIC HAZARD ESTIMATION MODE !!
%
% AUTHOR: S. Lasocki ver 2  01/2015 within IS-EPOS project.
%
% DESCRIPTION: The kernel estimator approach is a model-free alternative
% to estimating the magnitude distribution functions. The smoothing factor
% h, is estimated using the least-squares cross-validation for the Gaussian
% kernel function. The final form of the kernel is the adaptive kernel.
% In order to avoid repetitions, which cannot appear in a sample when the
% kernel estimators are used, the magnitude sample data are randomized
% within the magnitude round-off interval. The round-off interval length -
% eps is the least non-zero difference between sample data or 0.1 is the
% least difference if greater than 0.1. The randomization is done
% assuming exponential distribution of m in [m0-eps/2, m0+eps/2], where m0
% is the sample data point and eps is the length of roud-off inteval. The
% shape parameter of the exponential distribution is estimated from the whole
% data sample assuming the exponential distribution. The background sample
% - xx comprises the randomized values of magnitude doubled symmetrically
% with respect to the value Mmin-eps/2: length(xx)=2*length(M). Weigthing
% factors row vector for the adaptive kernel is of the same size as xx.
% The mean activity rate, lamb, is the number of events >=Mmin into the
% length of the period in which they occurred.
% The upper limit of the distribution Mmax is evaluated using
% the Kijko-Sellevol generic formula. If convergence is not reached the
% Whitlock @ Robson simplified formula is used:
% Mmaxest= 2(max obs M) - (second max obs M)).
%
% See: the references below for a more comprehensive description.
%
% This is a beta version of the program. Further developments are foreseen.
%
% REFERENCES:
%Silverman B.W. (1986) Density Estimation for Statistics and Data Analysis,
%   Chapman and Hall, London
%Kijko A., Lasocki S., Graham G. (2001) Pure appl. geophys. 158, 1655-1665
%Lasocki S., Orlecka-Sikora B. (2008) Tectonophysics 456, 28-37
%Kijko, A., and M.A. Sellevoll (1989) Bull. Seismol. Soc. Am. 79, 3,645-654
%Lasocki, S., Urban, P. (2011) Acta Geophys 59, 659-673,
% doi: 10.2478/s11600-010-0049-y
%
% INPUT:
%   t - vector of earthquake occurrence times
%   M - vector of earthquake magnitudes (sample data)
%   iop - determines the used unit of time. iop=0 - 'day', iop=1 - 'month',
%       iop=2 - 'year'
%   Mmin - lower bound of the distribution - catalog completeness level
%   Mmax - upper limit of Magnitude Distribution. Can be set by user, or
%   estimate within the program - it then should be set as Mmax=[].
%
% OUTPUT
%   lamb_all - mean activity rate for all events
%   lamb - mean activity rate for events >= Mmin
%   lamb_err - error paramter on the number of events >=Mmin. lamb_err=0
%       for 50 or more events >=Mmin and the parameter estimation is
%       continued, lamb_err=1 otherwise, all output paramters except
%       lamb_all and lamb are set to zero and the function execution is
%       terminated.
%   unit - string with name of time unit used ('year' or 'month' or 'day').
%   eps - length of round-off interval of magnitudes.
%   ierr - h-convergence indicator. ierr=0 if the estimation procedure of
%   the optimal smoothing factor has converged (a zero of the h functional
%   has been found), ierr=1 when multiple zeros of h functional were
%   encountered - the largest h is accepted, ierr = 2 when h functional did
%   not zeroe - the approximate h value is taken.
%   h - kernel smoothing factor.
%   xx - the background sample for the nonparametric estimators of magnitude
%   distribution
%   ambd - the weigthing factors for the adaptive kernel
%   Mmax - upper limit of magnitude distribution
%   err - error parameter on Mmax estimation, err=0 - convergence, err=1 -
%   no converegence of Kijko-Sellevol estimator, Robinson @ Whitlock
%   method used.
%   BIAS - Mmax estimation Bias (Lasocki and Urban, 2011)
%   SD   - Mmax standard deviation (Lasocki ands Urban, 2011)
%
% LICENSE
%     This file is a part of the IS-EPOS e-PLATFORM.
%
%     This is free software: you can redistribute it and/or modify it under
%     the terms of the GNU General Public License as published by the Free
%     Software Foundation, either version 3 of the License, or
%     (at your option) any later version.
%
%     This program is distributed in the hope that it will be useful,
%     but WITHOUT ANY WARRANTY; without even the implied warranty of
%     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%     GNU General Public License for more details.
%

function [lamb_all,lamb,lamb_err,unit,eps,ierr,h,xx,ambd,Mmax,err,BIAS,SD]=...
            Nonpar_tr_Ob(t,M,iop,Mmin,Mmax,Nsynth)
if nargin==5;Nsynth=[];end                                   % K08DEC2019
if isempty(t) || numel(t)<3 isempty(M(M>=Mmin)) %K03OCT
    t=[1 2];M=[1 2];   end   %K30SEP

lamb_err=0;
n=length(M);
t1=t(1);
%%% %%%%%%%%%%%%%MICHAL
xx=NaN;
ambd=NaN;
%%% %%%%%%%%%%%%%MICHAL
for i=1:n
    if M(i)>=Mmin; break; end
    t1=t(i+1);
end
    t2=t(n);
for i=n:1
    if M(i)>=Mmin; break; end
    t2=t(i-1);
end
nn=0;
for i=1:n
    if M(i)>=Mmin
        nn=nn+1;
    end
end
% SL 03MAR2015 ----------------------------------
[NM,unit]=time_diff(t(1),t(n),iop);
lamb_all=n/NM;
[NM,unit]=time_diff(t1,t2,iop);
lamb=nn/NM;
% SL 03MAR2015 ----------------------------------
if nn<50
    eps=0;ierr=0;h=0;Mmax=0;err=0;
    lamb_err=1;
    BIAS=NaN;SD=NaN;                                               %%%     K 08NOV2019
    return;
end

eps=magn_accur(M);
n=0;
for i=1:length(M)
    if M(i)>=Mmin;
        n=n+1;
        x(n)=M(i);
    end
end
x=sort(x)';
beta=1/(mean(x)-Mmin+eps/2);
[xx]=korekta(x,Mmin,eps,beta);
xx=sort(xx);
clear x;
xx = podwajanie(xx,Mmin-eps/2);
[h,ierr]=hopt(xx);
[ambd]=scaling(xx,h);

if isempty(Mmax)                         %K30AUG2019 - Allow for manually set Mmax
err_Mmax=1;[Mmax,err]=Mmaxest(xx,h,Mmin-eps/2);      %  err_Mmax added 04DEC2019
else
err_Mmax=0;err=0;                        %K30AUG2019
end


% Estimation of Mmax Bias                                               %%%   K 04DEC2019
% (Lasocki and Urban, 2011, doi:10.2478/s11600-010-0049-y)
if isempty(Nsynth)==0  && err_Mmax==1              % set number of synthetic datasets, e.g. 10000
[BIAS,SD]=Mmax_Bias_GR(t,M,Mmin,Mmax,err,h,xx,ambd,Nsynth);
elseif isempty(Nsynth)==0 && err_Mmax==0;
warning('Mmax must be empty for BIAS calculation');BIAS=[];SD=[];
else; BIAS=0;SD=0;
end

end

function [NM,unit]=time_diff(t1,t2,iop)              % SL 03MAR2015 ----------------------------------

% TIME DIFFERENCE BETWEEEN t1,t2 EXPRESSED IN DAY, MONTH OR YEAR UNIT
%
% t1 - start time (in MATLAB numerical format)
% t2 - end time (in MATLAB numerical format)  t2>=t1
%   iop - determines the used unit of time. iop=0 - 'day', iop=1 - 'month',
%       iop=2 - 'year'
%
%   NM - number of time units from t1 to t2
%   unit - string with name of time unit used ('year' or 'month' or 'day').

if iop==0
    NM=(t2-t1);
    unit='day';
elseif iop==1
    V1=datevec(t1);
    V2=datevec(t2);
    NM=V2(3)/eomday(V2(1),V2(2))+V2(2)+12-V1(2)-V1(3)/eomday(V1(1),V1(2))...
        +(V2(1)-V1(1)-1)*12;
    unit='month';
else
    V1=datevec(t1);
    V2=datevec(t2);
    NM2=V2(3);
    if V2(2)>1
        for k=1:V2(2)-1
            NM2=NM2+eomday(V2(1),k);
        end
    end
    day2=365; if eomday(V2(1),2)==29; day2=366; end;
    NM2=NM2/day2;
    NM1=V1(3);
    if V1(2)>1
        for k=1:V1(2)-1
            NM1=NM1+eomday(V1(1),k);
        end
    end
    day1=365; if eomday(V1(1),2)==29; day1=366; end;
    NM1=(day1-NM1)/day1;
    NM=NM2+NM1+V2(1)-V1(1)-1;
    unit='year';
end
end

function [m_corr]=korekta(m,Mmin,eps,beta)

% RANDOMIZATION OF MAGNITUDE WITHIN THE ACCURACY INTERVAL
%
% m - input vector of magnitudes
% Mmin - catalog completeness level
% eps - accuracy of magnitude
% beta - the parameter of the unbounded exponential distribution
%
% m_corr - vector of randomized magnitudes
%
F1=1-exp(-beta*(m-Mmin-0.5*eps));
F2=1-exp(-beta*(m-Mmin+0.5*eps));
u=rand(size(m));
w=u.*(F2-F1)+F1;
m_corr=Mmin-log(1-w)./beta;
end

function x2 = podwajanie(x,x0)

% DOUBLES THE SAMPLE

% If the sample x(i) is is truncated from the left hand side and belongs
% to the interval [x0,inf) or it is truncated from the right hand side and
% belongs to the interval (-inf,x0]
%   then the doubled sample is [-x(i)+2x0,x(i)]
% x - is the column data vector
% x2 - is the column vector of data doubled and sorted in the ascending
% order

x2=[-x+2*x0
    x];
x2=sort(x2);
end

function [h,ierr]=hopt(x)

%Estimation of the optimal smoothing factor by means of the least squares
%method
% x - column data vector
% The result is an optimal smoothing factor
% ierr=0 - convergence, ierr=1 - multiple h, ierr=2 - approximate h is used
% The function calls the procedure FZERO for the function 'funct'
% NEW VERSION 2 - without a square matrix. Also equipped with extra zeros
% search

% MODIFIED JUNE 2014

ierr=0;
n=length(x);
x=sort(x);
interval=[0.000001 2*std(x)/n^0.2];
x1=funct(interval(1),x);
x2=funct(interval(2),x);
if x1*x2<0
   fun = @(t) funct(t,x);                                       % for octave
	x0 =interval;                                                   % for octave
    [hh(1),fval,exitflag] = fzero(fun,x0);              % for octave

% Extra zeros search
    jj=1;
    for kk=2:7
        interval(1)=1.1*hh(jj);
        interval(2)=interval(1)+(kk-1)*hh(jj);
        x1=funct(interval(1),x);
        x2=funct(interval(2),x);
        if x1*x2<0
            jj=jj+1;
                fun = @(t) funct(t,x);                          % for octave
				x0 =interval;                                       % for octave
				[hh(jj),fval,exitflag] = fzero(fun,x0);   % for octave
        end
    end
    if jj>1;ierr=1;end
    h=max(hh);

   if exitflag==1;return;end

end
h=0.891836*(mean(x)-x(1))/(n^0.2);
ierr=2;
end

function [fct]=funct(t,x)
p2=1.41421356;
n=length(x);
yy=zeros(size(x));
for i=1:n,
   xij=(x-x(i)).^2/t^2;
   y=exp(-xij/4).*((xij/2-1)/p2)-2*exp(-xij/2).*(xij-1);
   yy(i)=sum(y);
end;
fct=sum(yy)-2*n;
clear xij y yy;
end


function [ambd]=scaling(x,h)

% EVALUATES A VECTOR OF SCALING FACTORS FOR THE NONPARAMETRIC ADAPTATIVE
% ESTIMATION

% x - the n dimensional column vector of data values sorted in the ascending
% order
% h - the optimal smoothing factor
% ambd - the resultant n dimensional row vector of local scaling factors

n=length(x);
c=sqrt(2*pi);
gau=zeros(1,n);
for i=1:n,
   gau(i)=sum(exp(-0.5*((x(i)-x)/h).^2))/c/n/h;
end
g=exp(mean(log(gau)));
ambd=sqrt(g./gau);
end

function [eps]=magn_accur(M)
x=sort(M);
d=x(2:length(x))-x(1:length(x)-1);
eps=min(d(d>0));
if eps>0.1; eps=0.1;end
end

function [Mmax,ierr]=Mmaxest(x,h,Mmin)

% ESTIMATION OF UPPER BOUND USING NONPARAMETRIC DISTRIBUTION FUNCTIONS
% x - row vector of magnitudes (basic sample).
% h - optimal smoothing factor
% Mmax - upper bound
% ierr=0 if basic procedure converges, ierr=1 when Robsen & Whitlock Mmas
% estimation

% Uses function 'dystryb'

n=length(x);
ierr=1;
x=sort(x);
Mmax1=x(n);
for i=1:50,
d=normcdf((Mmin-x)./h);
mian=sum(normcdf((Mmax1-x)./h)-d);
Mmax=x(n)+moja_calka(@dystryb,x(1),Mmax1,0.00001,h,mian,x,d);
if abs(Mmax-Mmax1)<0.01
   ierr=0;break;
end
Mmax1=Mmax;
end
if (ierr==1 || Mmax>9)
    Mmax=2*x(n)-x(n-1);
    ierr=1;
end
end

function [y]=dystryb(z,h,mian,x,d)
n=length(x);
m=length(z);
for i=1:m,
t=(z(i)-x)./h;
t=normcdf(t);
yy=sum(t-d);
y(i)=(yy/mian)^n;
end
end

function [calka,ier]=moja_calka(funfc,a,b,eps,varargin)

% Integration by means of 16th poit Gauss method. Adopted from CERNLIBRARY

% funfc - string with the name of function to be integrated
% a,b - integration limits
% eps - accurracy
% varargin - other parameters of function to be integrated
% calka - integral
% ier=0 - convergence, ier=1 - no conbergence

persistent W X CONST
W=[0.101228536290376 0.222381034453374 0.313706645877887 ...
0.362683783378362 0.027152459411754 0.062253523938648 ...
0.095158511682493 0.124628971255534 0.149595988816577 ...
0.169156519395003 0.182603415044924 0.189450610455069];
X=[0.960289856497536 0.796666477413627 0.525532409916329 ...
0.183434642495650 0.989400934991650 0.944575023073233 ...
0.865631202387832 0.755404408355003 0.617876244402644 ...
0.458016777657227 0.281603550779259 0.095012509837637];
CONST=1E-12;
delta=CONST*abs(a-b);
calka=0.;
aa=a;
y=b-aa;
ier=0;
while abs(y)>delta,
   bb=aa+y;
   c1=0.5*(aa+bb);
   c2=c1-aa;
   s8=0.;
   s16=0.;
   for i=1:4,
      u=X(i)*c2;
      s8=s8+W(i)*(feval(funfc,c1+u,varargin{:})+feval(funfc,c1-u,varargin{:}));
   end
   for i=5:12,
      u=X(i)*c2;
      s16=s16+W(i)*(feval(funfc,c1+u,varargin{:})+feval(funfc,c1-u,varargin{:}));
   end
   s8=s8*c2;
   s16=s16*c2;
   if abs(s16-s8)>eps*(1+abs(s16))
      y=0.5*y;
      calka=0.;
      ier=1;
   else
      calka=calka+s16;
      aa=bb;
      y=b-aa;
      ier=0;
   end
end
end

% --------------------- Mmax BIAS estimation routine ----------------------  K 08NOV2019

function [BIAS,SD]=Mmax_Bias_GR(t,m,Mc,Mmax1,err,h,xx,ambd,synth)

s1=sort(unique(m));s2=s1(2:length(s1))-s1(1:length(s1)-1);EPS=min(s2);

if err~=0
warning('process did not converge!!')
end

MAXm=max(m);N=numel(m(m>=Mc));DeltaM=MAXm-Mc;    %beta=b*log(10);
[mag,PDF_NPT,CDF_NPT]=dist_NPT(Mc-EPS,Mmax1+EPS,0.01,Mc,EPS,h,xx,ambd,Mmax1);

for j=1:synth         %set number of synthetic datasets, default is 10000
% % CDF:

                                                                                     % j

% linear interpolation to assign magnitude values from a uniform distribution sample
 iM=rand(1,N);M1=interp1q(CDF_NPT,mag,iM');
 br(j)=1/(log(10)*(mean(M1)-min(M1)+EPS/2));DM=range(M1);
 Mmax=max(M1);

 % Iteration Process to estimate b and Mmax
b1=10;best=[1.0 10.0];i=1;
while min(abs(diff(best)))>0.00001
w=exp(b1*(Mmax-Mc));E1=expint(N/(w-1));E2=expint(N*w/(w-1));
%E=expint(w);
Mme=Mmax+(E1-E2)/(b1*exp(-N/(w-1)))+(Mc)*exp(-N);    %Mme=round(Mme/EPS)*EPS;

if isnan(Mme)
KM=sort(unique(M1),'descend');
Mme=2*KM(1)-KM(2);
end

fun=@(bb) 1/bb+(Mme-Mc)/(1-exp(bb*(Mme-Mc)))-mean(M1)+Mc-EPS/2;  %consider th5 last 0.05 term
b1=fzero(fun,1);best(i)=b1;i=i+1;

if i==50
   warning('process did not converge!!');break
end

end

be(j)=b1/log(10);
Me(j)=Mme;dm(j)=DM;Mm(j)=Mmax;

end

BIAS=mean(MAXm-Me);
SD=std(MAXm-Me);

%b-mean(be)    %check b-value difference
%histogram(be)

% MAXm: maximum magnitude in the real catalog
% Mmax: maximum magnitudes observed in the synthetic catalogs (rounded)
% Me: maximum magnitude estimates for the synthetic catalogs
% Mmax1: maximum magnitude estimated by GRT
end