Update src/seismic_hazard_forecasting.py

use new activity rate forecast method
This commit is contained in:
2026-06-19 14:29:44 +02:00
parent 7e89f75c84
commit 46ff6b8a6c
+287 -23
View File
@@ -1,9 +1,246 @@
# -*- coding: utf-8 -*-
from eqdist.rate import datenum_to_datetime
import Rbeast as rb;
from scipy.stats import bootstrap
from matplotlib.dates import DateFormatter, AutoDateLocator
def plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
bin_dur, unit, multiplicator,
rate_forecast, rate_unc_high, rate_unc_low,
datenum_data, mag_data):
end_date = bin_edges[-1]
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(bin_edges_dt[1:], act_rate, '-o', linewidth=2.5, markersize=6.5, label='Activity rate')
if rate_forecast is not None:
next_date = end_date + (bin_dur / multiplicator)
ax.plot(datenum_to_datetime(next_date), rate_forecast,
'ro', label='Forecasted Rate', markersize=6.5)
ax.plot([bin_edges_dt[-1], datenum_to_datetime(next_date)], [act_rate[-1], rate_forecast], 'r-', linewidth=2.5)
ax.vlines(datenum_to_datetime(next_date), rate_unc_low, rate_unc_high, colors='r',
linewidth=2, label='Bootstrap uncertainty')
ax.xaxis.set_major_locator(AutoDateLocator())
ax.xaxis.set_major_formatter(DateFormatter('%d-%b-%Y'))
plt.xticks(rotation=45)
plt.title(f'Activity rate (Time Unit: {unit}, Bin Duration: {bin_dur} {unit})',fontsize=18)
# plt.title(f'Activity rate (Bin Duration: {bin_dur} {unit})',fontsize=18)
plt.xlabel('Time (Bin Center Date)', fontsize=16)
ax.set_ylabel('Activity rate per selected time period',fontsize=16)
plt.grid(True)
if len(rt) > 0:
for i in range(len(rt)):
ax.plot(bin_edges_dt[1:][boundaries[i]:boundaries[i+1]],
[rt[i]] * (boundaries[i+1] - boundaries[i]),
linewidth=2, label=f'Rate period {i+1}')
# ---- Magnitude scatter on right y-axis ----
ax2 = ax.twinx()
event_dates = [datenum_to_datetime(d) for d in datenum_data]
#-------------extract magnitude bins from data---------------------
mags = np.array(mag_data)
min_mag = mags.min()
max_mag = mags.max()
low_thresh = int(np.floor(min_mag))
high_thresh = int(np.floor(max_mag))
thresholds = list(range(low_thresh, high_thresh + 1))
base_size = 15
size_step = 35
bins_def = []
for idx, t in enumerate(thresholds):
low = t
if idx < len(thresholds) - 1:
high = thresholds[idx + 1]
label = f'{low:.1f} \u2264 M < {high:.1f}'
else:
high = np.inf
label = f'M \u2265 {low:.1f}'
size = base_size + idx * size_step
bins_def.append((low, high, size, label))
for low, high, size, label in bins_def:
mask = (mags >= low) & (mags < high)
if np.any(mask):
sel_dates = [d for d, m in zip(event_dates, mask) if m]
sel_mags = mags[mask]
ax2.scatter(sel_dates, sel_mags, s=size,
facecolor='purple', edgecolor='black',
alpha=0.15, linewidth=1, label=label)
ax2.set_ylabel('Magnitude', color='purple',fontsize=16)
ax2.yaxis.set_major_locator(MultipleLocator(0.5))
ax2.yaxis.set_minor_locator(MultipleLocator(0.1))
ax2.spines['right'].set_color('purple')
ax2.tick_params(axis='y', colors='purple')
h1, l1 = ax.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
handles = h1 + h2
labels = l1 + l2
n_legend = len(handles)
ncols = max(1, int(np.ceil(n_legend / 5))) # ~5 entries per column
#-------add 20% headroom above the data to make space for legend------
ymin, ymax = ax.get_ylim()
ax.set_ylim(ymin, ymax * 1.20)
ax.legend(handles, labels, loc='best',
ncol=ncols, borderaxespad=0,framealpha=0.7)
ax.set_zorder(ax2.get_zorder() + 1) # put scatter plot behind the line plot
ax.patch.set_visible(False)
fig.tight_layout()
plt.show()
def bootstrap_forecast(data):
window_data=data
if len(window_data) >= 5:
res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
method='BCa', n_resamples=1000)
else:
res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
method='BCa', n_resamples=int(len(window_data) ** len(window_data)))
forecast = np.mean(res95.bootstrap_distribution)
bca_conf95 = res95.confidence_interval
return forecast, bca_conf95
def calc_rates(act_rate, cps):
"""
Calculates mean activity rates between changepoints.
cps : sorted array of changepoint indices into act_rate
Returns rt (list of rates) and segment boundaries
"""
boundaries = [0] + list(cps.astype(int)) + [len(act_rate)]
rt = [np.mean(act_rate[boundaries[i]:boundaries[i+1]])
for i in range(len(boundaries)-1)]
return rt, boundaries
def apply_beast(act_rate, **kwargs):
"""
Applies BEAST to the smmothed rate data using different smoothing windows.
Input
act_rate : The activity rate data array to smooth and apply BEAST.
Output
out : A list of BEAST results for each smoothed rate array.
prob : A list of probabilities and change points extracted from BEAST results.
"""
mirror_len = int(np.ceil(0.20 * len(act_rate)))
left_mirror = act_rate[:mirror_len][::-1]
right_mirror = act_rate[-mirror_len:][::-1]
act_rate_mirrored = np.concatenate([left_mirror, act_rate, right_mirror])
mcmc_th = int(np.clip(np.ceil(len(act_rate) / 100), 2, 15))
beast_result = rb.beast(act_rate_mirrored, period=0,
tcp_minmax=[0, tcp_max],
torder_minmax=[torder_min, torder_max],
tseg_minlength=2, mcmc_chains=10,
mcmc_thin=mcmc_th, mcmc_seed=10)
# User-driven ncp selection
if ncp_choice == 'median':
ncp = beast_result.trend.ncp_median
if np.isnan(ncp) or ncp == 0:
return beast_result, np.array([])
elif ncp_choice == 'mode':
ncp = beast_result.trend.ncp_mode
if np.isnan(ncp) or ncp == 0:
return beast_result, np.array([])
elif ncp_choice == 'pct90':
ncp = beast_result.trend.ncp_pct90
if np.isnan(ncp) or ncp == 0:
return beast_result, np.array([])
else: # default: median with mode and pct90 fallback
ncp = beast_result.trend.ncp_median
if np.isnan(ncp) or ncp == 0:
ncp = beast_result.trend.mode
if np.isnan(ncp) or ncp == 0:
ncp = beast_result.trend.ncp_pct90
if np.isnan(ncp) or ncp == 0:
return beast_result, np.array([])
ncp = int(ncp)
cps = beast_result.trend.cp[:ncp]
# Discard mirrored zone changepoints and correct indices
valid_mask = (cps > mirror_len) & (cps <= mirror_len + len(act_rate))
cps = cps[valid_mask] - mirror_len
return beast_result, np.sort(cps)
def bins_and_beast(dates, unit, bin_dur, multiplicator, **kwargs):
start_date = dates.min()
end_date = dates.max()
valid_units = ['hours', 'days']
if unit not in valid_units:
unit = 'days'
bin_dur = 15
if (end_date - start_date) < 15 and unit == 'days':
unit = 'hours'
bin_dur = 12
bin_edges = [end_date]
while bin_edges[-1] > start_date:
bin_edges.append(bin_edges[-1] - (bin_dur / multiplicator))
bin_edges = bin_edges[::-1]
#-------Drop first bin or keep it if >80% of set duration------
first_width_days = bin_edges[1] - start_date
first_width_units = first_width_days * multiplicator
if first_width_units >= 0.8 * bin_dur:
bin_edges[0] = start_date # edge of first bin is at data start
else:
bin_edges = bin_edges[1:] # drop bin 0 (and its events)
#------------Error if remaining bins are fewer than 2------------
if len(bin_edges) < 2:
raise ValueError(
f"Not enough data to form at least one full bin of duration "
f"{bin_dur} {unit}(s) after dropping the partial first bin "
f"({first_width_units:.2f} {unit}(s), below the 80% threshold). "
f"Try a shorter bin_dur or check your input data range."
)
bin_edges_dt = [datenum_to_datetime(d) for d in bin_edges]
bin_counts, _ = np.histogram(dates, bins=bin_edges)
act_rate = [count / ((bin_edges[i + 1] - bin_edges[i]) * multiplicator / bin_dur)
for i, count in enumerate(bin_counts)]
out, cps = apply_beast(act_rate, **kwargs)
if len(cps) > 0:
rt, boundaries = calc_rates(act_rate, cps)
print(f'Changepoints detected at bins: {cps}')
else:
rt = []
boundaries = []
print('-----------------------------------------------------')
print('No changepoints detected by BEAST (Zhao et al., 2019)')
print('-----------------------------------------------------')
return act_rate, bin_counts, bin_edges, bin_edges_dt, out, cps, rt, boundaries, bin_dur, unit
def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max,
m_kde_method, xy_select, grid_dim, xy_win_method, rate_select, time_win_duration,
forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
forecast_select, custom_rate, forecast_len, time_unit, model, products_string, verbose):
# forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
"""
Python application that reads an earthquake catalog and performs seismic hazard forecasting.
Arguments:
@@ -93,8 +330,11 @@ def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max
exclude_low_fxy = False # skip low probability areas of the map
thresh_fxy = 1e-3 # minimum fxy value (location PDF) needed to do PGA estimation (to skip low probability areas); also should scale according to number of grid points
AOI_lat = np.array(AOI_extent[:2])
AOI_lon = np.array(AOI_extent[2:])
# AOI_lat = np.array(AOI_extent[:2])
# AOI_lon = np.array(AOI_extent[2:])
AOI_lat = np.array([51.48, 51.54])
AOI_lon = np.array([16.15, 16.24])
# log user selections
logger.debug(f"User input files\n Catalog: {catalog_file}\n Mc: {mc_file}\n Mag_PDF: {pdf_file}\n Mag: {m_file}")
@@ -345,6 +585,11 @@ verbose: {verbose}")
elif rate_select:
logger.info(f"Activity rate modeling selected")
ncp_choice = 'default'
tcp_max = 5
torder_min = 0
torder_max = 1
time, mag_dummy, lat_dummy, lon_dummy, depth_dummy = read_mat_cat(catalog_file, output_datenum=True)
datenum_data = time # REMEMBER THE DECIMAL DENOTES DAYS
@@ -364,31 +609,50 @@ verbose: {verbose}")
logger.error(msg)
raise Exception(msg)
# Selects dates in datenum format and procceeds to forecast value
start_date = datenum_data[-1] - (2 * time_win_duration / multiplicator)
dates_calc = [date for date in datenum_data if start_date <= date <= datenum_data[-1]]
forecasts, bca_conf95, rate_mean = bootstrap_forecast_rolling(dates_calc, multiplicator)
#-----------data are sorted in case they were not-----------------
sorted_pairs = sorted(zip(datenum_data, mag_data), key=lambda x: x[0])
datenum_data, mag_data = map(list, zip(*sorted_pairs))
# FINAL VALUES OF RATE AND ITS UNCERTAINTY IN THE 5-95 PERCENTILE
unc_bca05 = [ci.low for ci in bca_conf95];
unc_bca95 = [ci.high for ci in bca_conf95]
rate_unc_high = multiplicator / np.array(unc_bca05);
rate_unc_low = multiplicator / np.array(unc_bca95);
rate_forecast = multiplicator / np.median(forecasts) # [per time unit]
#------Forecasted rate is taken from BEAST or is equal to last value if no changepoints detected-----
if len(cps) > 0:
rate_forecast = rt[-1]
last_cp_bin = int(cps[-1])
else:
rate_forecast = act_rate[-1]
last_cp_bin = len(act_rate) - 1
# Plot of forecasted activity rate with previous binned activity rate
act_rate, bin_counts, bin_edges, out, pprs, rt, idx, u_e = calc_bins(np.array(datenum_data), time_unit,
time_win_duration, dates_calc,
last_cp_datenum = bin_edges[last_cp_bin]
dates_calc = [date for date in datenum_data if last_cp_datenum <= date <= datenum_data[-1]]
interevent_times = np.diff(dates_calc)
#------------Use BCa for uncertainty intervals-----------------
forecast, bca_conf95 = bootstrap_forecast(interevent_times)
rate_unc_high = bin_dur / (bca_conf95.low * multiplicator)
rate_unc_low = bin_dur / (bca_conf95.high * multiplicator)
#----------------------Plot------------------------------------
plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
bin_dur, time_unit, multiplicator,
rate_forecast, rate_unc_high, rate_unc_low,
multiplicator, quiet=True, figsize=(14,9))
datenum_data, mag_data)
# Assign probabilities
lambdas, lambdas_perc = lambda_probs(act_rate, dates_calc, bin_edges)
lambdas = np.array(lambdas, dtype='d')
lambdas_perc = np.array(lambdas_perc, dtype='d')
print("\n----------------- Forecast Summary -----------------")
print(f"Forecasted activity rate (next {bin_dur} {time_unit}(s)): {rate_forecast:.4f}")
print(f"95% BCa confidence interval: [{rate_unc_low:.4f}, {rate_unc_high:.4f}]")
print("------------------------------------------------------")
# print("Forecasted activity rates: ", lambdas, "events per", time_unit[:-1])
logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit} with percentages {lambdas_perc}")
lambdas = np.array(rate_forecast/bin_dur, dtype='d')
lambdas_perc = np.array(1.0, dtype='d')
logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit}")
np.savetxt('activity_rate.csv', np.vstack((lambdas, lambdas_perc)).T, header="lambda, percentage",
delimiter=',', fmt='%1.4f')