From 46ff6b8a6c805a950eb8ff8e6254fe1d650bcd04 Mon Sep 17 00:00:00 2001 From: ftong <95+ftong@noreply.example.org> Date: Fri, 19 Jun 2026 14:29:44 +0200 Subject: [PATCH] Update src/seismic_hazard_forecasting.py use new activity rate forecast method --- src/seismic_hazard_forecasting.py | 314 +++++++++++++++++++++++++++++++++++--- 1 file changed, 289 insertions(+), 25 deletions(-) diff --git a/src/seismic_hazard_forecasting.py b/src/seismic_hazard_forecasting.py index 3602e52..369ed95 100644 --- a/src/seismic_hazard_forecasting.py +++ b/src/seismic_hazard_forecasting.py @@ -1,9 +1,246 @@ # -*- coding: utf-8 -*- +from eqdist.rate import datenum_to_datetime +import Rbeast as rb; +from scipy.stats import bootstrap +from matplotlib.dates import DateFormatter, AutoDateLocator + +def plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries, + bin_dur, unit, multiplicator, + rate_forecast, rate_unc_high, rate_unc_low, + datenum_data, mag_data): + + end_date = bin_edges[-1] + + fig, ax = plt.subplots(figsize=(14, 6)) + ax.plot(bin_edges_dt[1:], act_rate, '-o', linewidth=2.5, markersize=6.5, label='Activity rate') + + if rate_forecast is not None: + next_date = end_date + (bin_dur / multiplicator) + ax.plot(datenum_to_datetime(next_date), rate_forecast, + 'ro', label='Forecasted Rate', markersize=6.5) + ax.plot([bin_edges_dt[-1], datenum_to_datetime(next_date)], [act_rate[-1], rate_forecast], 'r-', linewidth=2.5) + ax.vlines(datenum_to_datetime(next_date), rate_unc_low, rate_unc_high, colors='r', + linewidth=2, label='Bootstrap uncertainty') + + ax.xaxis.set_major_locator(AutoDateLocator()) + ax.xaxis.set_major_formatter(DateFormatter('%d-%b-%Y')) + plt.xticks(rotation=45) + plt.title(f'Activity rate (Time Unit: {unit}, Bin Duration: {bin_dur} {unit})',fontsize=18) + # plt.title(f'Activity rate (Bin Duration: {bin_dur} {unit})',fontsize=18) + plt.xlabel('Time (Bin Center Date)', fontsize=16) + ax.set_ylabel('Activity rate per selected time period',fontsize=16) + plt.grid(True) + + if len(rt) > 0: + for i in range(len(rt)): + ax.plot(bin_edges_dt[1:][boundaries[i]:boundaries[i+1]], + [rt[i]] * (boundaries[i+1] - boundaries[i]), + linewidth=2, label=f'Rate period {i+1}') + + # ---- Magnitude scatter on right y-axis ---- + ax2 = ax.twinx() + event_dates = [datenum_to_datetime(d) for d in datenum_data] + + #-------------extract magnitude bins from data--------------------- + mags = np.array(mag_data) + min_mag = mags.min() + max_mag = mags.max() + + low_thresh = int(np.floor(min_mag)) + high_thresh = int(np.floor(max_mag)) + + thresholds = list(range(low_thresh, high_thresh + 1)) + + base_size = 15 + size_step = 35 + + bins_def = [] + for idx, t in enumerate(thresholds): + low = t + if idx < len(thresholds) - 1: + high = thresholds[idx + 1] + label = f'{low:.1f} \u2264 M < {high:.1f}' + else: + high = np.inf + label = f'M \u2265 {low:.1f}' + size = base_size + idx * size_step + bins_def.append((low, high, size, label)) + + for low, high, size, label in bins_def: + mask = (mags >= low) & (mags < high) + if np.any(mask): + sel_dates = [d for d, m in zip(event_dates, mask) if m] + sel_mags = mags[mask] + ax2.scatter(sel_dates, sel_mags, s=size, + facecolor='purple', edgecolor='black', + alpha=0.15, linewidth=1, label=label) + + ax2.set_ylabel('Magnitude', color='purple',fontsize=16) + ax2.yaxis.set_major_locator(MultipleLocator(0.5)) + ax2.yaxis.set_minor_locator(MultipleLocator(0.1)) + ax2.spines['right'].set_color('purple') + ax2.tick_params(axis='y', colors='purple') + + h1, l1 = ax.get_legend_handles_labels() + h2, l2 = ax2.get_legend_handles_labels() + handles = h1 + h2 + labels = l1 + l2 + n_legend = len(handles) + + ncols = max(1, int(np.ceil(n_legend / 5))) # ~5 entries per column + + #-------add 20% headroom above the data to make space for legend------ + ymin, ymax = ax.get_ylim() + ax.set_ylim(ymin, ymax * 1.20) + + ax.legend(handles, labels, loc='best', + ncol=ncols, borderaxespad=0,framealpha=0.7) + + ax.set_zorder(ax2.get_zorder() + 1) # put scatter plot behind the line plot + ax.patch.set_visible(False) + + fig.tight_layout() + plt.show() + +def bootstrap_forecast(data): + + window_data=data + + if len(window_data) >= 5: + res95 = bootstrap((window_data,), np.mean, confidence_level=0.95, + method='BCa', n_resamples=1000) + else: + res95 = bootstrap((window_data,), np.mean, confidence_level=0.95, + method='BCa', n_resamples=int(len(window_data) ** len(window_data))) + + forecast = np.mean(res95.bootstrap_distribution) + bca_conf95 = res95.confidence_interval + return forecast, bca_conf95 + +def calc_rates(act_rate, cps): + """ + Calculates mean activity rates between changepoints. + cps : sorted array of changepoint indices into act_rate + Returns rt (list of rates) and segment boundaries + """ + boundaries = [0] + list(cps.astype(int)) + [len(act_rate)] + rt = [np.mean(act_rate[boundaries[i]:boundaries[i+1]]) + for i in range(len(boundaries)-1)] + return rt, boundaries + +def apply_beast(act_rate, **kwargs): + """ + Applies BEAST to the smmothed rate data using different smoothing windows. + Input + act_rate : The activity rate data array to smooth and apply BEAST. + Output + out : A list of BEAST results for each smoothed rate array. + prob : A list of probabilities and change points extracted from BEAST results. + """ + + mirror_len = int(np.ceil(0.20 * len(act_rate))) + left_mirror = act_rate[:mirror_len][::-1] + right_mirror = act_rate[-mirror_len:][::-1] + act_rate_mirrored = np.concatenate([left_mirror, act_rate, right_mirror]) + + mcmc_th = int(np.clip(np.ceil(len(act_rate) / 100), 2, 15)) + beast_result = rb.beast(act_rate_mirrored, period=0, + tcp_minmax=[0, tcp_max], + torder_minmax=[torder_min, torder_max], + tseg_minlength=2, mcmc_chains=10, + mcmc_thin=mcmc_th, mcmc_seed=10) + + # User-driven ncp selection + if ncp_choice == 'median': + ncp = beast_result.trend.ncp_median + if np.isnan(ncp) or ncp == 0: + return beast_result, np.array([]) + elif ncp_choice == 'mode': + ncp = beast_result.trend.ncp_mode + if np.isnan(ncp) or ncp == 0: + return beast_result, np.array([]) + elif ncp_choice == 'pct90': + ncp = beast_result.trend.ncp_pct90 + if np.isnan(ncp) or ncp == 0: + return beast_result, np.array([]) + else: # default: median with mode and pct90 fallback + ncp = beast_result.trend.ncp_median + if np.isnan(ncp) or ncp == 0: + ncp = beast_result.trend.mode + if np.isnan(ncp) or ncp == 0: + ncp = beast_result.trend.ncp_pct90 + if np.isnan(ncp) or ncp == 0: + return beast_result, np.array([]) + + ncp = int(ncp) + cps = beast_result.trend.cp[:ncp] + + # Discard mirrored zone changepoints and correct indices + valid_mask = (cps > mirror_len) & (cps <= mirror_len + len(act_rate)) + cps = cps[valid_mask] - mirror_len + + return beast_result, np.sort(cps) + +def bins_and_beast(dates, unit, bin_dur, multiplicator, **kwargs): + start_date = dates.min() + end_date = dates.max() + + valid_units = ['hours', 'days'] + if unit not in valid_units: + unit = 'days' + bin_dur = 15 + if (end_date - start_date) < 15 and unit == 'days': + unit = 'hours' + bin_dur = 12 + + bin_edges = [end_date] + while bin_edges[-1] > start_date: + bin_edges.append(bin_edges[-1] - (bin_dur / multiplicator)) + bin_edges = bin_edges[::-1] + + #-------Drop first bin or keep it if >80% of set duration------ + first_width_days = bin_edges[1] - start_date + first_width_units = first_width_days * multiplicator + + if first_width_units >= 0.8 * bin_dur: + bin_edges[0] = start_date # edge of first bin is at data start + else: + bin_edges = bin_edges[1:] # drop bin 0 (and its events) + + #------------Error if remaining bins are fewer than 2------------ + if len(bin_edges) < 2: + raise ValueError( + f"Not enough data to form at least one full bin of duration " + f"{bin_dur} {unit}(s) after dropping the partial first bin " + f"({first_width_units:.2f} {unit}(s), below the 80% threshold). " + f"Try a shorter bin_dur or check your input data range." + ) + + bin_edges_dt = [datenum_to_datetime(d) for d in bin_edges] + bin_counts, _ = np.histogram(dates, bins=bin_edges) + + act_rate = [count / ((bin_edges[i + 1] - bin_edges[i]) * multiplicator / bin_dur) + for i, count in enumerate(bin_counts)] + + out, cps = apply_beast(act_rate, **kwargs) + + if len(cps) > 0: + rt, boundaries = calc_rates(act_rate, cps) + print(f'Changepoints detected at bins: {cps}') + else: + rt = [] + boundaries = [] + print('-----------------------------------------------------') + print('No changepoints detected by BEAST (Zhao et al., 2019)') + print('-----------------------------------------------------') + + return act_rate, bin_counts, bin_edges, bin_edges_dt, out, cps, rt, boundaries, bin_dur, unit def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max, m_kde_method, xy_select, grid_dim, xy_win_method, rate_select, time_win_duration, - forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose): + forecast_select, custom_rate, forecast_len, time_unit, model, products_string, verbose): +# forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose): """ Python application that reads an earthquake catalog and performs seismic hazard forecasting. Arguments: @@ -93,8 +330,11 @@ def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max exclude_low_fxy = False # skip low probability areas of the map thresh_fxy = 1e-3 # minimum fxy value (location PDF) needed to do PGA estimation (to skip low probability areas); also should scale according to number of grid points - AOI_lat = np.array(AOI_extent[:2]) - AOI_lon = np.array(AOI_extent[2:]) +# AOI_lat = np.array(AOI_extent[:2]) +# AOI_lon = np.array(AOI_extent[2:]) + AOI_lat = np.array([51.48, 51.54]) + AOI_lon = np.array([16.15, 16.24]) + # log user selections logger.debug(f"User input files\n Catalog: {catalog_file}\n Mc: {mc_file}\n Mag_PDF: {pdf_file}\n Mag: {m_file}") @@ -344,7 +584,12 @@ verbose: {verbose}") elif rate_select: logger.info(f"Activity rate modeling selected") - + + ncp_choice = 'default' + tcp_max = 5 + torder_min = 0 + torder_max = 1 + time, mag_dummy, lat_dummy, lon_dummy, depth_dummy = read_mat_cat(catalog_file, output_datenum=True) datenum_data = time # REMEMBER THE DECIMAL DENOTES DAYS @@ -364,31 +609,50 @@ verbose: {verbose}") logger.error(msg) raise Exception(msg) - # Selects dates in datenum format and procceeds to forecast value - start_date = datenum_data[-1] - (2 * time_win_duration / multiplicator) - dates_calc = [date for date in datenum_data if start_date <= date <= datenum_data[-1]] - forecasts, bca_conf95, rate_mean = bootstrap_forecast_rolling(dates_calc, multiplicator) + #-----------data are sorted in case they were not----------------- + sorted_pairs = sorted(zip(datenum_data, mag_data), key=lambda x: x[0]) + datenum_data, mag_data = map(list, zip(*sorted_pairs)) - # FINAL VALUES OF RATE AND ITS UNCERTAINTY IN THE 5-95 PERCENTILE - unc_bca05 = [ci.low for ci in bca_conf95]; - unc_bca95 = [ci.high for ci in bca_conf95] - rate_unc_high = multiplicator / np.array(unc_bca05); - rate_unc_low = multiplicator / np.array(unc_bca95); - rate_forecast = multiplicator / np.median(forecasts) # [per time unit] + #------Forecasted rate is taken from BEAST or is equal to last value if no changepoints detected----- + if len(cps) > 0: + rate_forecast = rt[-1] + last_cp_bin = int(cps[-1]) + else: + rate_forecast = act_rate[-1] + last_cp_bin = len(act_rate) - 1 - # Plot of forecasted activity rate with previous binned activity rate - act_rate, bin_counts, bin_edges, out, pprs, rt, idx, u_e = calc_bins(np.array(datenum_data), time_unit, - time_win_duration, dates_calc, - rate_forecast, rate_unc_high, rate_unc_low, - multiplicator, quiet=True, figsize=(14,9)) + last_cp_datenum = bin_edges[last_cp_bin] + dates_calc = [date for date in datenum_data if last_cp_datenum <= date <= datenum_data[-1]] + interevent_times = np.diff(dates_calc) - # Assign probabilities - lambdas, lambdas_perc = lambda_probs(act_rate, dates_calc, bin_edges) - lambdas = np.array(lambdas, dtype='d') - lambdas_perc = np.array(lambdas_perc, dtype='d') + #------------Use BCa for uncertainty intervals----------------- + forecast, bca_conf95 = bootstrap_forecast(interevent_times) + rate_unc_high = bin_dur / (bca_conf95.low * multiplicator) + rate_unc_low = bin_dur / (bca_conf95.high * multiplicator) + + #----------------------Plot------------------------------------ + plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries, + bin_dur, time_unit, multiplicator, + rate_forecast, rate_unc_high, rate_unc_low, + datenum_data, mag_data) + + print("\n----------------- Forecast Summary -----------------") + print(f"Forecasted activity rate (next {bin_dur} {time_unit}(s)): {rate_forecast:.4f}") + print(f"95% BCa confidence interval: [{rate_unc_low:.4f}, {rate_unc_high:.4f}]") + print("------------------------------------------------------") - # print("Forecasted activity rates: ", lambdas, "events per", time_unit[:-1]) - logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit} with percentages {lambdas_perc}") + lambdas = np.array(rate_forecast/bin_dur, dtype='d') + lambdas_perc = np.array(1.0, dtype='d') + + + + + + + + + + logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit}") np.savetxt('activity_rate.csv', np.vstack((lambdas, lambdas_perc)).T, header="lambda, percentage", delimiter=',', fmt='%1.4f')