Update src/seismic_hazard_forecasting.py
use new activity rate forecast method
This commit is contained in:
@@ -1,9 +1,246 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from eqdist.rate import datenum_to_datetime
|
||||
import Rbeast as rb;
|
||||
from scipy.stats import bootstrap
|
||||
from matplotlib.dates import DateFormatter, AutoDateLocator
|
||||
|
||||
def plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
|
||||
bin_dur, unit, multiplicator,
|
||||
rate_forecast, rate_unc_high, rate_unc_low,
|
||||
datenum_data, mag_data):
|
||||
|
||||
end_date = bin_edges[-1]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(14, 6))
|
||||
ax.plot(bin_edges_dt[1:], act_rate, '-o', linewidth=2.5, markersize=6.5, label='Activity rate')
|
||||
|
||||
if rate_forecast is not None:
|
||||
next_date = end_date + (bin_dur / multiplicator)
|
||||
ax.plot(datenum_to_datetime(next_date), rate_forecast,
|
||||
'ro', label='Forecasted Rate', markersize=6.5)
|
||||
ax.plot([bin_edges_dt[-1], datenum_to_datetime(next_date)], [act_rate[-1], rate_forecast], 'r-', linewidth=2.5)
|
||||
ax.vlines(datenum_to_datetime(next_date), rate_unc_low, rate_unc_high, colors='r',
|
||||
linewidth=2, label='Bootstrap uncertainty')
|
||||
|
||||
ax.xaxis.set_major_locator(AutoDateLocator())
|
||||
ax.xaxis.set_major_formatter(DateFormatter('%d-%b-%Y'))
|
||||
plt.xticks(rotation=45)
|
||||
plt.title(f'Activity rate (Time Unit: {unit}, Bin Duration: {bin_dur} {unit})',fontsize=18)
|
||||
# plt.title(f'Activity rate (Bin Duration: {bin_dur} {unit})',fontsize=18)
|
||||
plt.xlabel('Time (Bin Center Date)', fontsize=16)
|
||||
ax.set_ylabel('Activity rate per selected time period',fontsize=16)
|
||||
plt.grid(True)
|
||||
|
||||
if len(rt) > 0:
|
||||
for i in range(len(rt)):
|
||||
ax.plot(bin_edges_dt[1:][boundaries[i]:boundaries[i+1]],
|
||||
[rt[i]] * (boundaries[i+1] - boundaries[i]),
|
||||
linewidth=2, label=f'Rate period {i+1}')
|
||||
|
||||
# ---- Magnitude scatter on right y-axis ----
|
||||
ax2 = ax.twinx()
|
||||
event_dates = [datenum_to_datetime(d) for d in datenum_data]
|
||||
|
||||
#-------------extract magnitude bins from data---------------------
|
||||
mags = np.array(mag_data)
|
||||
min_mag = mags.min()
|
||||
max_mag = mags.max()
|
||||
|
||||
low_thresh = int(np.floor(min_mag))
|
||||
high_thresh = int(np.floor(max_mag))
|
||||
|
||||
thresholds = list(range(low_thresh, high_thresh + 1))
|
||||
|
||||
base_size = 15
|
||||
size_step = 35
|
||||
|
||||
bins_def = []
|
||||
for idx, t in enumerate(thresholds):
|
||||
low = t
|
||||
if idx < len(thresholds) - 1:
|
||||
high = thresholds[idx + 1]
|
||||
label = f'{low:.1f} \u2264 M < {high:.1f}'
|
||||
else:
|
||||
high = np.inf
|
||||
label = f'M \u2265 {low:.1f}'
|
||||
size = base_size + idx * size_step
|
||||
bins_def.append((low, high, size, label))
|
||||
|
||||
for low, high, size, label in bins_def:
|
||||
mask = (mags >= low) & (mags < high)
|
||||
if np.any(mask):
|
||||
sel_dates = [d for d, m in zip(event_dates, mask) if m]
|
||||
sel_mags = mags[mask]
|
||||
ax2.scatter(sel_dates, sel_mags, s=size,
|
||||
facecolor='purple', edgecolor='black',
|
||||
alpha=0.15, linewidth=1, label=label)
|
||||
|
||||
ax2.set_ylabel('Magnitude', color='purple',fontsize=16)
|
||||
ax2.yaxis.set_major_locator(MultipleLocator(0.5))
|
||||
ax2.yaxis.set_minor_locator(MultipleLocator(0.1))
|
||||
ax2.spines['right'].set_color('purple')
|
||||
ax2.tick_params(axis='y', colors='purple')
|
||||
|
||||
h1, l1 = ax.get_legend_handles_labels()
|
||||
h2, l2 = ax2.get_legend_handles_labels()
|
||||
handles = h1 + h2
|
||||
labels = l1 + l2
|
||||
n_legend = len(handles)
|
||||
|
||||
ncols = max(1, int(np.ceil(n_legend / 5))) # ~5 entries per column
|
||||
|
||||
#-------add 20% headroom above the data to make space for legend------
|
||||
ymin, ymax = ax.get_ylim()
|
||||
ax.set_ylim(ymin, ymax * 1.20)
|
||||
|
||||
ax.legend(handles, labels, loc='best',
|
||||
ncol=ncols, borderaxespad=0,framealpha=0.7)
|
||||
|
||||
ax.set_zorder(ax2.get_zorder() + 1) # put scatter plot behind the line plot
|
||||
ax.patch.set_visible(False)
|
||||
|
||||
fig.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def bootstrap_forecast(data):
|
||||
|
||||
window_data=data
|
||||
|
||||
if len(window_data) >= 5:
|
||||
res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
|
||||
method='BCa', n_resamples=1000)
|
||||
else:
|
||||
res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
|
||||
method='BCa', n_resamples=int(len(window_data) ** len(window_data)))
|
||||
|
||||
forecast = np.mean(res95.bootstrap_distribution)
|
||||
bca_conf95 = res95.confidence_interval
|
||||
return forecast, bca_conf95
|
||||
|
||||
def calc_rates(act_rate, cps):
|
||||
"""
|
||||
Calculates mean activity rates between changepoints.
|
||||
cps : sorted array of changepoint indices into act_rate
|
||||
Returns rt (list of rates) and segment boundaries
|
||||
"""
|
||||
boundaries = [0] + list(cps.astype(int)) + [len(act_rate)]
|
||||
rt = [np.mean(act_rate[boundaries[i]:boundaries[i+1]])
|
||||
for i in range(len(boundaries)-1)]
|
||||
return rt, boundaries
|
||||
|
||||
def apply_beast(act_rate, **kwargs):
|
||||
"""
|
||||
Applies BEAST to the smmothed rate data using different smoothing windows.
|
||||
Input
|
||||
act_rate : The activity rate data array to smooth and apply BEAST.
|
||||
Output
|
||||
out : A list of BEAST results for each smoothed rate array.
|
||||
prob : A list of probabilities and change points extracted from BEAST results.
|
||||
"""
|
||||
|
||||
mirror_len = int(np.ceil(0.20 * len(act_rate)))
|
||||
left_mirror = act_rate[:mirror_len][::-1]
|
||||
right_mirror = act_rate[-mirror_len:][::-1]
|
||||
act_rate_mirrored = np.concatenate([left_mirror, act_rate, right_mirror])
|
||||
|
||||
mcmc_th = int(np.clip(np.ceil(len(act_rate) / 100), 2, 15))
|
||||
beast_result = rb.beast(act_rate_mirrored, period=0,
|
||||
tcp_minmax=[0, tcp_max],
|
||||
torder_minmax=[torder_min, torder_max],
|
||||
tseg_minlength=2, mcmc_chains=10,
|
||||
mcmc_thin=mcmc_th, mcmc_seed=10)
|
||||
|
||||
# User-driven ncp selection
|
||||
if ncp_choice == 'median':
|
||||
ncp = beast_result.trend.ncp_median
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
return beast_result, np.array([])
|
||||
elif ncp_choice == 'mode':
|
||||
ncp = beast_result.trend.ncp_mode
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
return beast_result, np.array([])
|
||||
elif ncp_choice == 'pct90':
|
||||
ncp = beast_result.trend.ncp_pct90
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
return beast_result, np.array([])
|
||||
else: # default: median with mode and pct90 fallback
|
||||
ncp = beast_result.trend.ncp_median
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
ncp = beast_result.trend.mode
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
ncp = beast_result.trend.ncp_pct90
|
||||
if np.isnan(ncp) or ncp == 0:
|
||||
return beast_result, np.array([])
|
||||
|
||||
ncp = int(ncp)
|
||||
cps = beast_result.trend.cp[:ncp]
|
||||
|
||||
# Discard mirrored zone changepoints and correct indices
|
||||
valid_mask = (cps > mirror_len) & (cps <= mirror_len + len(act_rate))
|
||||
cps = cps[valid_mask] - mirror_len
|
||||
|
||||
return beast_result, np.sort(cps)
|
||||
|
||||
def bins_and_beast(dates, unit, bin_dur, multiplicator, **kwargs):
|
||||
start_date = dates.min()
|
||||
end_date = dates.max()
|
||||
|
||||
valid_units = ['hours', 'days']
|
||||
if unit not in valid_units:
|
||||
unit = 'days'
|
||||
bin_dur = 15
|
||||
if (end_date - start_date) < 15 and unit == 'days':
|
||||
unit = 'hours'
|
||||
bin_dur = 12
|
||||
|
||||
bin_edges = [end_date]
|
||||
while bin_edges[-1] > start_date:
|
||||
bin_edges.append(bin_edges[-1] - (bin_dur / multiplicator))
|
||||
bin_edges = bin_edges[::-1]
|
||||
|
||||
#-------Drop first bin or keep it if >80% of set duration------
|
||||
first_width_days = bin_edges[1] - start_date
|
||||
first_width_units = first_width_days * multiplicator
|
||||
|
||||
if first_width_units >= 0.8 * bin_dur:
|
||||
bin_edges[0] = start_date # edge of first bin is at data start
|
||||
else:
|
||||
bin_edges = bin_edges[1:] # drop bin 0 (and its events)
|
||||
|
||||
#------------Error if remaining bins are fewer than 2------------
|
||||
if len(bin_edges) < 2:
|
||||
raise ValueError(
|
||||
f"Not enough data to form at least one full bin of duration "
|
||||
f"{bin_dur} {unit}(s) after dropping the partial first bin "
|
||||
f"({first_width_units:.2f} {unit}(s), below the 80% threshold). "
|
||||
f"Try a shorter bin_dur or check your input data range."
|
||||
)
|
||||
|
||||
bin_edges_dt = [datenum_to_datetime(d) for d in bin_edges]
|
||||
bin_counts, _ = np.histogram(dates, bins=bin_edges)
|
||||
|
||||
act_rate = [count / ((bin_edges[i + 1] - bin_edges[i]) * multiplicator / bin_dur)
|
||||
for i, count in enumerate(bin_counts)]
|
||||
|
||||
out, cps = apply_beast(act_rate, **kwargs)
|
||||
|
||||
if len(cps) > 0:
|
||||
rt, boundaries = calc_rates(act_rate, cps)
|
||||
print(f'Changepoints detected at bins: {cps}')
|
||||
else:
|
||||
rt = []
|
||||
boundaries = []
|
||||
print('-----------------------------------------------------')
|
||||
print('No changepoints detected by BEAST (Zhao et al., 2019)')
|
||||
print('-----------------------------------------------------')
|
||||
|
||||
return act_rate, bin_counts, bin_edges, bin_edges_dt, out, cps, rt, boundaries, bin_dur, unit
|
||||
|
||||
|
||||
def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max,
|
||||
m_kde_method, xy_select, grid_dim, xy_win_method, rate_select, time_win_duration,
|
||||
forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
|
||||
forecast_select, custom_rate, forecast_len, time_unit, model, products_string, verbose):
|
||||
# forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
|
||||
"""
|
||||
Python application that reads an earthquake catalog and performs seismic hazard forecasting.
|
||||
Arguments:
|
||||
@@ -93,8 +330,11 @@ def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max
|
||||
exclude_low_fxy = False # skip low probability areas of the map
|
||||
thresh_fxy = 1e-3 # minimum fxy value (location PDF) needed to do PGA estimation (to skip low probability areas); also should scale according to number of grid points
|
||||
|
||||
AOI_lat = np.array(AOI_extent[:2])
|
||||
AOI_lon = np.array(AOI_extent[2:])
|
||||
# AOI_lat = np.array(AOI_extent[:2])
|
||||
# AOI_lon = np.array(AOI_extent[2:])
|
||||
AOI_lat = np.array([51.48, 51.54])
|
||||
AOI_lon = np.array([16.15, 16.24])
|
||||
|
||||
|
||||
# log user selections
|
||||
logger.debug(f"User input files\n Catalog: {catalog_file}\n Mc: {mc_file}\n Mag_PDF: {pdf_file}\n Mag: {m_file}")
|
||||
@@ -345,6 +585,11 @@ verbose: {verbose}")
|
||||
elif rate_select:
|
||||
logger.info(f"Activity rate modeling selected")
|
||||
|
||||
ncp_choice = 'default'
|
||||
tcp_max = 5
|
||||
torder_min = 0
|
||||
torder_max = 1
|
||||
|
||||
time, mag_dummy, lat_dummy, lon_dummy, depth_dummy = read_mat_cat(catalog_file, output_datenum=True)
|
||||
|
||||
datenum_data = time # REMEMBER THE DECIMAL DENOTES DAYS
|
||||
@@ -364,31 +609,50 @@ verbose: {verbose}")
|
||||
logger.error(msg)
|
||||
raise Exception(msg)
|
||||
|
||||
# Selects dates in datenum format and procceeds to forecast value
|
||||
start_date = datenum_data[-1] - (2 * time_win_duration / multiplicator)
|
||||
dates_calc = [date for date in datenum_data if start_date <= date <= datenum_data[-1]]
|
||||
forecasts, bca_conf95, rate_mean = bootstrap_forecast_rolling(dates_calc, multiplicator)
|
||||
#-----------data are sorted in case they were not-----------------
|
||||
sorted_pairs = sorted(zip(datenum_data, mag_data), key=lambda x: x[0])
|
||||
datenum_data, mag_data = map(list, zip(*sorted_pairs))
|
||||
|
||||
# FINAL VALUES OF RATE AND ITS UNCERTAINTY IN THE 5-95 PERCENTILE
|
||||
unc_bca05 = [ci.low for ci in bca_conf95];
|
||||
unc_bca95 = [ci.high for ci in bca_conf95]
|
||||
rate_unc_high = multiplicator / np.array(unc_bca05);
|
||||
rate_unc_low = multiplicator / np.array(unc_bca95);
|
||||
rate_forecast = multiplicator / np.median(forecasts) # [per time unit]
|
||||
#------Forecasted rate is taken from BEAST or is equal to last value if no changepoints detected-----
|
||||
if len(cps) > 0:
|
||||
rate_forecast = rt[-1]
|
||||
last_cp_bin = int(cps[-1])
|
||||
else:
|
||||
rate_forecast = act_rate[-1]
|
||||
last_cp_bin = len(act_rate) - 1
|
||||
|
||||
# Plot of forecasted activity rate with previous binned activity rate
|
||||
act_rate, bin_counts, bin_edges, out, pprs, rt, idx, u_e = calc_bins(np.array(datenum_data), time_unit,
|
||||
time_win_duration, dates_calc,
|
||||
rate_forecast, rate_unc_high, rate_unc_low,
|
||||
multiplicator, quiet=True, figsize=(14,9))
|
||||
last_cp_datenum = bin_edges[last_cp_bin]
|
||||
dates_calc = [date for date in datenum_data if last_cp_datenum <= date <= datenum_data[-1]]
|
||||
interevent_times = np.diff(dates_calc)
|
||||
|
||||
# Assign probabilities
|
||||
lambdas, lambdas_perc = lambda_probs(act_rate, dates_calc, bin_edges)
|
||||
lambdas = np.array(lambdas, dtype='d')
|
||||
lambdas_perc = np.array(lambdas_perc, dtype='d')
|
||||
#------------Use BCa for uncertainty intervals-----------------
|
||||
forecast, bca_conf95 = bootstrap_forecast(interevent_times)
|
||||
rate_unc_high = bin_dur / (bca_conf95.low * multiplicator)
|
||||
rate_unc_low = bin_dur / (bca_conf95.high * multiplicator)
|
||||
|
||||
# print("Forecasted activity rates: ", lambdas, "events per", time_unit[:-1])
|
||||
logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit} with percentages {lambdas_perc}")
|
||||
#----------------------Plot------------------------------------
|
||||
plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
|
||||
bin_dur, time_unit, multiplicator,
|
||||
rate_forecast, rate_unc_high, rate_unc_low,
|
||||
datenum_data, mag_data)
|
||||
|
||||
print("\n----------------- Forecast Summary -----------------")
|
||||
print(f"Forecasted activity rate (next {bin_dur} {time_unit}(s)): {rate_forecast:.4f}")
|
||||
print(f"95% BCa confidence interval: [{rate_unc_low:.4f}, {rate_unc_high:.4f}]")
|
||||
print("------------------------------------------------------")
|
||||
|
||||
lambdas = np.array(rate_forecast/bin_dur, dtype='d')
|
||||
lambdas_perc = np.array(1.0, dtype='d')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit}")
|
||||
np.savetxt('activity_rate.csv', np.vstack((lambdas, lambdas_perc)).T, header="lambda, percentage",
|
||||
delimiter=',', fmt='%1.4f')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user