From eb144d1f33b937d775957e4e58d5c292a1ea2a14 Mon Sep 17 00:00:00 2001 From: Joanna Kocot Date: Mon, 2 Jun 2025 15:38:37 +0200 Subject: [PATCH] initial commit --- src/GmpeEqsCalcWrapper.m | 39 +++++++++++ src/createReport.m | 52 ++++++++++++++ src/createResidualsSummary.m | 54 +++++++++++++++ src/dataset_GMPE.m | 161 +++++++++++++++++++++++++++++++++++++++++++ src/porownaj_model.m | 27 ++++++++ src/step1_GMPE.m | 34 +++++++++ src/step2_GMPE.m | 115 +++++++++++++++++++++++++++++++ 7 files changed, 482 insertions(+) create mode 100644 src/GmpeEqsCalcWrapper.m create mode 100644 src/createReport.m create mode 100644 src/createResidualsSummary.m create mode 100644 src/dataset_GMPE.m create mode 100644 src/porownaj_model.m create mode 100644 src/step1_GMPE.m create mode 100644 src/step2_GMPE.m diff --git a/src/GmpeEqsCalcWrapper.m b/src/GmpeEqsCalcWrapper.m new file mode 100644 index 0000000..b6805d7 --- /dev/null +++ b/src/GmpeEqsCalcWrapper.m @@ -0,0 +1,39 @@ +% +% ---------------- +% Copyright © 2019 ACK Cyfronet AGH, Poland. +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% This work was partially funded by EPOS-IP Project. +% ---------------- +% +function [dataset,model,report,residuals,comparison,SEE] = GmpeEqsCalcWrapper(JCatalog, event_key, output_set, event_set, dist_set, site_set,... + h, max_depth, prevModel) + + dataset = dataset_GMPE(JCatalog,event_key,output_set,event_set,dist_set,site_set,h); + if (h == 0) + [model, dataset, SEE] = step2_GMPE(dataset, max_depth); + else + model = step1_GMPE(dataset); + SEE = EmptyValue; + end + + if (~isempty(prevModel)) + comparison = porownaj_model(prevModel, model); + else + comparison = EmptyValue; + end + report = createReport(model,dataset); + residuals = createResidualsSummary(model); +end + diff --git a/src/createReport.m b/src/createReport.m new file mode 100644 index 0000000..e28560d --- /dev/null +++ b/src/createReport.m @@ -0,0 +1,52 @@ +function report=createReport(mdl, pp) + +% Produces report from GMPE calculation product (LinearModel) + +% (c) Dorota Olszewska 2017.03.10 + +report = struct(); +report.linRegModel=mdl.Formula.char; +report.h=pp.h +report.obsNo=mdl.NumObservations; +report.dfe=mdl.DFE; +report.rmse=mdl.RMSE; +report.r2=mdl.Rsquared.Ordinary; +a1=anova(mdl,'summary'); +report.p=a1{2,5}; +report.coefficients=mdl.Coefficients; +report.coeffsStatisticallyInsignificant=mdl.CoefficientNames(mdl.Coefficients.pValue(1:end)>0.05); +report.coeffsSourceNonPositive={}; +report.coeffsDistNonNegative={}; +if pp.N.eventN~=0 + if pp.N.eventN==2 + mm=(strcmp(mdl.PredictorNames,pp.event_set{1}))+(strcmp(mdl.PredictorNames,pp.event_set{2})); + elseif pp.N.eventN==1 + mm=(strcmp(mdl.PredictorNames,pp.event_set{1})); + end + + M.test=sum(mm); + M.Names=mdl.PredictorNames(mm==1); + M.var=mdl.Coefficients.Estimate(M.Names); + M.znak=mdl.Coefficients.Estimate(M.Names)>0; + if sum(M.znak)~=M.test + report.coeffsSourceNonPositive=M.Names(M.znak==0); + end +end + +if pp.N.distN~=0 + if pp.N.distN==2 + rr=(strcmp(mdl.PredictorNames,pp.dist_set{1}))+(strcmp(mdl.PredictorNames,pp.dist_set{2})); + elseif pp.N.distN==1 + rr=(strcmp(mdl.PredictorNames,pp.dist_set{1})); + end + + R.test=sum(rr); + R.Names=mdl.PredictorNames(rr==1); + R.var=mdl.Coefficients.Estimate(R.Names); + R.znak=mdl.Coefficients.Estimate(R.Names)<0; + R.exc=R.Names(R.znak==0); + if sum(R.znak)~=R.test + report.coeffsDistNonNegative=R.exc; + end +end +end diff --git a/src/createResidualsSummary.m b/src/createResidualsSummary.m new file mode 100644 index 0000000..a843f51 --- /dev/null +++ b/src/createResidualsSummary.m @@ -0,0 +1,54 @@ +% +% ---------------- +% Copyright © 2019 ACK Cyfronet AGH, Poland. +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% This work was partially funded by EPOS-IP Project. +% ---------------- +% +function summary = createResidualsSummary(mdl) + + summary = struct(); + summary.residuals = mdl.Residuals.Raw; + summary.stats = createResidualsStats(mdl); + summary.outlier = test_outlier(mdl); + +end + + +function stats = createResidualsStats(mdl) + + est=mdl.Fitted(mdl.ObservationInfo.Excluded==0); + obs=mdl.Variables.(mdl.ResponseName); + obs=obs(mdl.ObservationInfo.Excluded==0); + n=mdl.NumObservations; + mm=mean(mdl.Residuals.Raw(~isnan(mdl.Residuals.Raw),:)); + sd=std(mdl.Residuals.Raw(~isnan(mdl.Residuals.Raw),:)); + + m=LinearModel.fit(est,obs); + C=m.CoefficientCovariance; + a = []; + for i=1:n + a(i)=sqrt([1,est(i)]*C*[1,est(1)]'+m.MSE)*1.64; + end + + stats = struct(); + stats.residualsMean=mm; + stats.residualsStd=sd; + stats.estimated=est; + stats.observed=obs; + stats.fittedLine = m.Fitted; + stats.fittedLineA = a'; + +end diff --git a/src/dataset_GMPE.m b/src/dataset_GMPE.m new file mode 100644 index 0000000..5a0354b --- /dev/null +++ b/src/dataset_GMPE.m @@ -0,0 +1,161 @@ +function pp=dataset_GMPE(JCatalog,event_key,output_set,event_set,dist_set,site_set,h) +% function pp=dataset_GMPE(...) +% Prepares the dataset for calculation GMPE +% +% Input: +% JCatalog - join Catalog and GMCatalog +% event_key - unique values to recognise the same event +% output_set - value of ground motion +% event_set - parameters characterize the source +% dist_set - parameters characterize the path +% h - depth parameter +% site_set - parameters characterize the site +% all _set are cell array +% +%Output: +% pp - finele structure for GMPE calculation +% pp.data - dataset with data +% pp.event_key - cell array with unique values to recognise the same event +% pp.output_set - cell array with value of ground motion +% pp.event_set - cell array with parameters characterize the source +% pp.dist_set - cell array with parameters characterize the path +% pp.h - cell array with depth parameter +% pp.N - dataset with number of event (pp.N.eventN), distance (pp.N.distN) +% and site variable (pp.N.siteN), for no variable of them suitable pp.N.--N==0 +% +% +% (c) Dorota Olszewska 2017.03.10 +% ver2 2019.05.30 improvment of bugs when model without h + +%% Number and names of independent variable + +VarNames=event_key; +if length(event_set)==2 + eventN=2; + VarNames=[VarNames,event_set]; +elseif strcmp(event_set{1},'none')==0 + eventN=1; + VarNames=[VarNames,event_set]; +else + eventN=0; +end + +if length(dist_set)==2 + distN=2; + VarNames=[VarNames,dist_set]; +elseif strcmp(dist_set{1},'none')==0 + distN=1; + VarNames=[VarNames,dist_set]; +else + distN=0; +end + + +switch site_set{1} + case {'none'} + siteN=0; + case {'station'} + siteN=length(unique(JCatalog(strcmp('SID',{JCatalog.field})).val)); + siteName=unique(JCatalog(strcmp('SID',{JCatalog.field})).val); + VarNames=[VarNames,matlab.lang.makeValidName(strcat(cellstr(repmat('S_',siteN,1)),siteName))']; + %case {'Vs30'} +end + + +VarNames=[VarNames, output_set]; +N=eventN+distN+siteN; +n=length(JCatalog(1).val); +%% create dataset data for calculation of GMPE +% event_key, event_set, dist_set, site_set, output_set + +data=mat2dataset(zeros(n,N+2)); +data.Properties.VarNames=VarNames; + +%event_key variable +data.(event_key{1})=JCatalog(strcmp({JCatalog.field},event_key{1})).val; + +%event variable +if eventN==0 +else + if strfind(event_set{1},'M') + if length(event_set)==2 + data.(event_set{1})=JCatalog(strcmp({JCatalog.field},event_set{1})).val; + data.(event_set{2})=JCatalog(strcmp({JCatalog.field},event_set{1})).val.^2; + elseif length(event_set)==1 && isempty(strfind(event_set{1},'2')) + data.(event_set{1})=JCatalog(strcmp({JCatalog.field},event_set{1})).val; + elseif length(event_set)==1 && strfind(event_set{1},'2') + k=event_set{1}; + data.(event_set{1})=JCatalog(strcmp({JCatalog.field},k(:,1:end-1))).val.^2; + end + elseif strfind(event_set{1},'E') + if length(event_set)==2 + data.(event_set{1})=log10(JCatalog(strcmp({JCatalog.field},event_set{1})).val); + data.Properties.VarNames(strcmp(data.Properties.VarNames,event_set{1}))={['log',event_set{1}]}; + event_set{1}={['log',event_set{1}]}; + data.(event_set{2})=log10(JCatalog(strcmp({JCatalog.field},event_set{1})).val).^2; + data.Properties.VarNames(strcmp(data.Properties.VarNames,event_set{2}))={['log',event_set{2}]}; + event_set{2}={['log',event_set{2}]}; + elseif length(event_set)==1 && isempty(strfind(event_set{1},'2')) + data.(event_set{1})=log10(JCatalog(strcmp({JCatalog.field},event_set{1})).val); + data.Properties.VarNames(strcmp(data.Properties.VarNames,event_set{1}))={['log',event_set{1}]}; + event_set{1}={['log',event_set{1}]}; + elseif length(event_set)==1 && strfind(event_set{1},'2') + k=event_set{1}; + data.(event_set{1})=log10(JCatalog(strcmp({JCatalog.field},k(:,1:end-1))).val).^2; + data.Properties.VarNames(strcmp(data.Properties.VarNames,event_set{1}))={['log',event_set{1}]}; + event_set{1}={['log',event_set{1}]}; + end + end +end + +%distance variable +if distN==0 +else + if isempty(h) + if length(dist_set)==2 + data.(dist_set{1})=JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val; + data.(dist_set{2})=log10(JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val); + elseif length(dist_set)==1 && isempty(strfind(dist_set{1},'log')) + data.(dist_set{1})=JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val; + elseif length(dist_set)==1 && strfind(dist_set{1},'log') + data.(dist_set{1})=log10(JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val); + end + elseif length(h)==1 + if length(dist_set)==2 + data.(dist_set{1})=((JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val).^2+h.^2).^0.5; + data.(dist_set{2})=0.5.*log10((JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val).^2+h.^2); + elseif length(dist_set)==1 && isempty(strfind(dist_set{1},'log')) + a=(JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val); + data.(dist_set{1})=(a.^2+h.^2).^0.5; + elseif length(dist_set)==1 && ~isempty(strfind(dist_set{1},'log')) + data.(dist_set{1})=0.5.*log10((JCatalog(strcmp({JCatalog.field},'Epicentral_dist')).val).^2+h.^2); + end + end +end + +%site variable +switch site_set{1} + case {'station'} + for i=1:siteN + data.(matlab.lang.makeValidName(['S_',siteName{i}]))=strcmp(JCatalog(strcmp({JCatalog.field},'SID')).val,siteName{i}); + end + + +end + +%ground motion parameter + data.(output_set{1})=log10(JCatalog(strcmp({JCatalog.field},output_set{1})).val); + data.Properties.VarNames(strcmp(data.Properties.VarNames,output_set{1}))={['log',output_set{1}]}; + output_set{1}={['log',output_set{1}]}; +%% final structure pp + +pp.data=data; +pp.event_key=event_key; +pp.output_set=output_set; +pp.event_set=event_set; +pp.dist_set=dist_set; +pp.h=h; +pp.site_set=site_set; +pp.N=dataset(eventN,distN,siteN); + +end \ No newline at end of file diff --git a/src/porownaj_model.m b/src/porownaj_model.m new file mode 100644 index 0000000..2238df7 --- /dev/null +++ b/src/porownaj_model.m @@ -0,0 +1,27 @@ +function report=porownaj_model(mdl1,mdl2) + +% porownaj_model(....) +% Compares parameters of two estimated models of GMPE +% +% Input: +% mdl1 - the first GMPE model- output of linear model Matlab, +% mdl - the second GMPE model- output of linear model Matlab. +% +% Output: +% Displays the comparison (sign >,< or =) between: +% number of observations and coefficient, value of SEE, R2, F and p. +% +% (c) Dorota Olszewska 2016.03.10 +% 2017.08.02 - v1.2 - report is saved to text file + +report = struct(); +report.numObservations = [mdl1.NumObservations mdl2.NumObservations]; +report.numCoefficients = [mdl1.NumCoefficients mdl2.NumCoefficients]; +report.rmse = [mdl1.RMSE mdl2.RMSE]; +report.r2 = [mdl1.Rsquared.Ordinary mdl2.Rsquared.Ordinary]; +a1=anova(mdl1,'summary'); +a2=anova(mdl2,'summary'); +report.f = [a1{2,4} a2{2,4}]; +report.p = [a1{2,5} a2{2,5}]; + +end \ No newline at end of file diff --git a/src/step1_GMPE.m b/src/step1_GMPE.m new file mode 100644 index 0000000..6e7af12 --- /dev/null +++ b/src/step1_GMPE.m @@ -0,0 +1,34 @@ +function result=step1_GMPE(pp) + +% Step 1 of calculation the GMPE +% The model based on pp structure is calculated using linear regression +% method and statistical and physical correctness of model is checked. +% +% Input: +% pp - finele structure for GMPE calculation +% pp.data - dataset with data +% pp.event_key - cell array with unique values to recognise the same event +% pp.output_set - cell array with value of ground motion +% pp.event_set - cell array with parameters characterize the source +% pp.dist_set - cell array with parameters characterize the path +% pp.h - cell array with depth parameter +% pp.N - dataset with number of event (pp.N.eventN), distance (pp.N.distN) +% and site variable (pp.N.siteN), for no variable of them suitable +% pp.N.--N==0 +% +% Output: +% result = output of linear model Matlab function. +% +% (c) Dorota Olszewska 2017.03.10 +% 2017.08.02 - v1.2 - reports are saved to text files + +switch pp.site_set{1} + case {'none'} + mdl = LinearModel.fit(pp.data(:,2:end)); + case {'station'} + mdl = LinearModel.fit(pp.data(:,2:end),'Intercept',false); +end + +result=mdl; + +end \ No newline at end of file diff --git a/src/step2_GMPE.m b/src/step2_GMPE.m new file mode 100644 index 0000000..7e30849 --- /dev/null +++ b/src/step2_GMPE.m @@ -0,0 +1,115 @@ +function [Result3, pp1, SEE]=step2_GMPE(pp, max_depth_km) + +% Step 2 of calculation the GMPE - estimation of fix depth +% Calculates GMPE for given parameters and estimates h (fix depth). +% H is chosen when SEE is the smallest. +% +% Input: +% pp - finale structure for GMPE calculation +% pp.data - dataset with data +% pp.event_key - cell array with unique values to recognise the same event +% pp.output_set - cell array with value of ground motion +% pp.event_set - cell array with parameters characterize the source +% pp.dist_set - cell array with parameters characterize the path +% pp.h - cell array with depth parameter +% pp.N - dataset with number of event (pp.N.eventN), distance (pp.N.distN) +% and site variable (pp.N.siteN), for no variable of them suitable +% pp.N.--N==0 +% max_depth_km - maximum depth used in the h calculation (in km) +% +% Output: +% Result3 = output of linear model Matlab function with estimated h, +% pp1 - structure of dataset with new h, +% SEE - SEE for depth 1:max_depth [m]. +% +% (c) Dorota Olszewska 2017.03.10 +% update 2019.07.10 + + + +warning off; + +max_depth = max_depth_km * 1000 +pp0=pp; +switch pp.site_set{1} + case {'none'} + + if pp.N.distN==2 + for h=1:max_depth + pp0.data.R=pp.data.R.^2+(h./1000).^2; + pp0.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); + baza=[ones(length(pp0.data),1),double(pp0.data(:,2:end-1))]; + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'logR')) + for h=1:max_depth + pp0.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); + baza=[ones(length(pp0.data),1),double(pp0.data(:,2:end-1))]; + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'R')) + for h=1:max_depth + pp0.data.R=pp.data.R.^2+(h./1000).^2; + baza=[ones(length(pp0.data),1),double(pp0.data(:,2:end-1))]; + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + + end + + case {'station'} + + if pp.N.distN==2 + for h=1:max_depth + pp0.data.R=pp.data.R.^2+(h./1000).^2; + pp0.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); + baza=double(pp0.data(:,2:end-1)); + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + + elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'logR')) + for h=1:max_depth + pp0.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); + baza=double(pp0.data(:,2:end-1)); + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'R')) + for h=1:max_depth + pp0.data.R=pp.data.R.^2+(h./1000).^2; + baza=double(pp0.data(:,2:end-1)); + logGM=double(pp0.data(:,end)); + b=(baza'*baza)^(-1)*baza'*logGM; + SEE(h)=sqrt((sum((baza*b-logGM).^2))./(length(baza)-pp0.N.eventN-pp0.N.distN-pp0.N.siteN)); + end + end + +end + + + +h=find(SEE==min(SEE),1); +pp.h=h/1000; +SEE=[1:max_depth;SEE]'; +%model z prawidlowym h +if pp.N.distN==2 + pp.data.R=pp.data.R.^2+(h./1000).^2; + pp.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); +elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'logR')) + pp.data.logR=0.5.*log10((10.^pp.data.logR).^2+(h./1000).^2); +elseif pp.N.distN==1 && ~isempty(strfind(pp.dist_set{1},'R')) + pp.data.R=pp.data.R.^2+(h./1000).^2; +end + +pp1=pp; +Result3 = step1_GMPE(pp1); + +end