From 46ff6b8a6c805a950eb8ff8e6254fe1d650bcd04 Mon Sep 17 00:00:00 2001
From: ftong <95+ftong@noreply.example.org>
Date: Fri, 19 Jun 2026 14:29:44 +0200
Subject: [PATCH] Update src/seismic_hazard_forecasting.py

use new activity rate forecast method
---
 src/seismic_hazard_forecasting.py | 314 +++++++++++++++++++++++++++++++++++---
 1 file changed, 289 insertions(+), 25 deletions(-)

diff --git a/src/seismic_hazard_forecasting.py b/src/seismic_hazard_forecasting.py
index 3602e52..369ed95 100644
--- a/src/seismic_hazard_forecasting.py
+++ b/src/seismic_hazard_forecasting.py
@@ -1,9 +1,246 @@
 # -*- coding: utf-8 -*-
+from eqdist.rate import datenum_to_datetime
+import Rbeast as rb;
+from scipy.stats import bootstrap
+from matplotlib.dates import DateFormatter, AutoDateLocator
+
+def plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
+                  bin_dur, unit, multiplicator,
+                  rate_forecast, rate_unc_high, rate_unc_low,
+                  datenum_data, mag_data):
+
+    end_date = bin_edges[-1]
+
+    fig, ax = plt.subplots(figsize=(14, 6))
+    ax.plot(bin_edges_dt[1:], act_rate, '-o', linewidth=2.5, markersize=6.5, label='Activity rate')
+
+    if rate_forecast is not None:
+        next_date = end_date + (bin_dur / multiplicator)
+        ax.plot(datenum_to_datetime(next_date), rate_forecast, 
+                'ro', label='Forecasted Rate', markersize=6.5)
+        ax.plot([bin_edges_dt[-1], datenum_to_datetime(next_date)], [act_rate[-1], rate_forecast], 'r-', linewidth=2.5)
+        ax.vlines(datenum_to_datetime(next_date), rate_unc_low, rate_unc_high, colors='r',
+                  linewidth=2, label='Bootstrap uncertainty')
+
+    ax.xaxis.set_major_locator(AutoDateLocator())
+    ax.xaxis.set_major_formatter(DateFormatter('%d-%b-%Y'))
+    plt.xticks(rotation=45)
+    plt.title(f'Activity rate (Time Unit: {unit}, Bin Duration: {bin_dur} {unit})',fontsize=18)
+    # plt.title(f'Activity rate (Bin Duration: {bin_dur} {unit})',fontsize=18)
+    plt.xlabel('Time (Bin Center Date)', fontsize=16)
+    ax.set_ylabel('Activity rate per selected time period',fontsize=16)
+    plt.grid(True)
+
+    if len(rt) > 0:
+        for i in range(len(rt)):
+            ax.plot(bin_edges_dt[1:][boundaries[i]:boundaries[i+1]],
+                    [rt[i]] * (boundaries[i+1] - boundaries[i]),
+                    linewidth=2, label=f'Rate period {i+1}')
+
+    # ---- Magnitude scatter on right y-axis ----
+    ax2 = ax.twinx()
+    event_dates = [datenum_to_datetime(d) for d in datenum_data]
+
+    #-------------extract magnitude bins from data---------------------
+    mags = np.array(mag_data)
+    min_mag = mags.min()
+    max_mag = mags.max()
+
+    low_thresh = int(np.floor(min_mag))
+    high_thresh = int(np.floor(max_mag))
+
+    thresholds = list(range(low_thresh, high_thresh + 1))
+
+    base_size = 15
+    size_step = 35
+
+    bins_def = []
+    for idx, t in enumerate(thresholds):
+        low = t
+        if idx < len(thresholds) - 1:
+            high = thresholds[idx + 1]
+            label = f'{low:.1f} \u2264 M < {high:.1f}'
+        else:
+            high = np.inf
+            label = f'M \u2265 {low:.1f}'
+        size = base_size + idx * size_step
+        bins_def.append((low, high, size, label))
+
+    for low, high, size, label in bins_def:
+        mask = (mags >= low) & (mags < high)
+        if np.any(mask):
+            sel_dates = [d for d, m in zip(event_dates, mask) if m]
+            sel_mags = mags[mask]
+            ax2.scatter(sel_dates, sel_mags, s=size,
+                        facecolor='purple', edgecolor='black',
+                        alpha=0.15, linewidth=1, label=label)
+
+    ax2.set_ylabel('Magnitude', color='purple',fontsize=16)
+    ax2.yaxis.set_major_locator(MultipleLocator(0.5))
+    ax2.yaxis.set_minor_locator(MultipleLocator(0.1))
+    ax2.spines['right'].set_color('purple')
+    ax2.tick_params(axis='y', colors='purple')
+
+    h1, l1 = ax.get_legend_handles_labels()
+    h2, l2 = ax2.get_legend_handles_labels()
+    handles = h1 + h2
+    labels = l1 + l2
+    n_legend = len(handles)
+
+    ncols = max(1, int(np.ceil(n_legend / 5)))  # ~5 entries per column
+    
+    #-------add 20% headroom above the data to make space for legend------
+    ymin, ymax = ax.get_ylim()
+    ax.set_ylim(ymin, ymax * 1.20) 
+    
+    ax.legend(handles, labels, loc='best',
+          ncol=ncols, borderaxespad=0,framealpha=0.7)
+
+    ax.set_zorder(ax2.get_zorder() + 1) # put scatter plot behind the line plot
+    ax.patch.set_visible(False)
+    
+    fig.tight_layout()
+    plt.show()
+
+def bootstrap_forecast(data):
+
+    window_data=data
+
+    if len(window_data) >= 5:
+        res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
+                          method='BCa', n_resamples=1000)
+    else:
+        res95 = bootstrap((window_data,), np.mean, confidence_level=0.95,
+                          method='BCa', n_resamples=int(len(window_data) ** len(window_data)))
+
+    forecast = np.mean(res95.bootstrap_distribution)
+    bca_conf95 = res95.confidence_interval
+    return forecast, bca_conf95
+
+def calc_rates(act_rate, cps):
+    """
+    Calculates mean activity rates between changepoints.
+    cps : sorted array of changepoint indices into act_rate
+    Returns rt (list of rates) and segment boundaries
+    """
+    boundaries = [0] + list(cps.astype(int)) + [len(act_rate)]
+    rt = [np.mean(act_rate[boundaries[i]:boundaries[i+1]]) 
+          for i in range(len(boundaries)-1)]
+    return rt, boundaries
+
+def apply_beast(act_rate, **kwargs):
+    """
+    Applies BEAST to the smmothed rate data using different smoothing windows.
+    Input
+    act_rate : The activity rate data array to smooth and apply BEAST.
+    Output
+    out : A list of BEAST results for each smoothed rate array.
+    prob : A list of probabilities and change points extracted from BEAST results.
+    """
+
+    mirror_len = int(np.ceil(0.20 * len(act_rate)))
+    left_mirror = act_rate[:mirror_len][::-1]
+    right_mirror = act_rate[-mirror_len:][::-1]
+    act_rate_mirrored = np.concatenate([left_mirror, act_rate, right_mirror])
+
+    mcmc_th = int(np.clip(np.ceil(len(act_rate) / 100), 2, 15))
+    beast_result = rb.beast(act_rate_mirrored, period=0,
+                            tcp_minmax=[0, tcp_max],
+                            torder_minmax=[torder_min, torder_max],
+                            tseg_minlength=2, mcmc_chains=10,
+                            mcmc_thin=mcmc_th, mcmc_seed=10)
+
+    # User-driven ncp selection
+    if ncp_choice == 'median':
+        ncp = beast_result.trend.ncp_median
+        if np.isnan(ncp) or ncp == 0:
+            return beast_result, np.array([])
+    elif ncp_choice == 'mode':
+        ncp = beast_result.trend.ncp_mode
+        if np.isnan(ncp) or ncp == 0:
+            return beast_result, np.array([])   
+    elif ncp_choice == 'pct90':
+        ncp = beast_result.trend.ncp_pct90
+        if np.isnan(ncp) or ncp == 0:
+            return beast_result, np.array([])
+    else:  # default: median with mode and pct90 fallback
+        ncp = beast_result.trend.ncp_median
+        if np.isnan(ncp) or ncp == 0:
+            ncp = beast_result.trend.mode
+        if np.isnan(ncp) or ncp == 0:
+            ncp = beast_result.trend.ncp_pct90
+        if np.isnan(ncp) or ncp == 0:    
+            return beast_result, np.array([])
+
+    ncp = int(ncp)
+    cps = beast_result.trend.cp[:ncp]
+
+    # Discard mirrored zone changepoints and correct indices
+    valid_mask = (cps > mirror_len) & (cps <= mirror_len + len(act_rate))
+    cps = cps[valid_mask] - mirror_len
+
+    return beast_result, np.sort(cps)
+
+def bins_and_beast(dates, unit, bin_dur, multiplicator, **kwargs):
+    start_date = dates.min()
+    end_date = dates.max()
+
+    valid_units = ['hours', 'days']
+    if unit not in valid_units:
+        unit = 'days'
+        bin_dur = 15
+    if (end_date - start_date) < 15 and unit == 'days':
+        unit = 'hours'
+        bin_dur = 12
+
+    bin_edges = [end_date]
+    while bin_edges[-1] > start_date:
+        bin_edges.append(bin_edges[-1] - (bin_dur / multiplicator))
+    bin_edges = bin_edges[::-1]
+
+    #-------Drop first bin or keep it if >80% of set duration------
+    first_width_days = bin_edges[1] - start_date
+    first_width_units = first_width_days * multiplicator
+
+    if first_width_units >= 0.8 * bin_dur:
+        bin_edges[0] = start_date  # edge of first bin is at data start
+    else:
+        bin_edges = bin_edges[1:]  # drop bin 0 (and its events)
+    
+    #------------Error if remaining bins are fewer than 2------------
+    if len(bin_edges) < 2:
+        raise ValueError(
+            f"Not enough data to form at least one full bin of duration "
+            f"{bin_dur} {unit}(s) after dropping the partial first bin "
+            f"({first_width_units:.2f} {unit}(s), below the 80% threshold). "
+            f"Try a shorter bin_dur or check your input data range."
+        )
+    
+    bin_edges_dt = [datenum_to_datetime(d) for d in bin_edges]
+    bin_counts, _ = np.histogram(dates, bins=bin_edges)
+
+    act_rate = [count / ((bin_edges[i + 1] - bin_edges[i]) * multiplicator / bin_dur)
+                for i, count in enumerate(bin_counts)]
+
+    out, cps = apply_beast(act_rate, **kwargs)
+
+    if len(cps) > 0:
+        rt, boundaries = calc_rates(act_rate, cps)
+        print(f'Changepoints detected at bins: {cps}')
+    else:
+        rt = []
+        boundaries = []
+        print('-----------------------------------------------------')
+        print('No changepoints detected by BEAST (Zhao et al., 2019)')
+        print('-----------------------------------------------------')
+
+    return act_rate, bin_counts, bin_edges, bin_edges_dt, out, cps, rt, boundaries, bin_dur, unit
 
 
 def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max,
          m_kde_method, xy_select, grid_dim, xy_win_method, rate_select, time_win_duration,
-         forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
+         forecast_select, custom_rate, forecast_len, time_unit, model, products_string, verbose):
+#         forecast_select, custom_rate, forecast_len, time_unit, AOI_extent, model, products_string, verbose):
     """
     Python application that reads an earthquake catalog and performs seismic hazard forecasting.
     Arguments:
@@ -93,8 +330,11 @@ def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max
     exclude_low_fxy = False # skip low probability areas of the map
     thresh_fxy = 1e-3  # minimum fxy value (location PDF) needed to do PGA estimation (to skip low probability areas); also should scale according to number of grid points
 
-    AOI_lat = np.array(AOI_extent[:2]) 
-    AOI_lon = np.array(AOI_extent[2:])
+#    AOI_lat = np.array(AOI_extent[:2]) 
+#    AOI_lon = np.array(AOI_extent[2:])
+    AOI_lat = np.array([51.48, 51.54])
+    AOI_lon = np.array([16.15, 16.24])
+
     
     # log user selections
     logger.debug(f"User input files\n Catalog: {catalog_file}\n Mc: {mc_file}\n Mag_PDF: {pdf_file}\n Mag: {m_file}")
@@ -344,7 +584,12 @@ verbose: {verbose}")
 
     elif rate_select:
         logger.info(f"Activity rate modeling selected")
-
+        
+        ncp_choice = 'default'
+        tcp_max = 5
+        torder_min = 0
+        torder_max = 1
+        
         time, mag_dummy, lat_dummy, lon_dummy, depth_dummy = read_mat_cat(catalog_file, output_datenum=True)
 
         datenum_data = time  # REMEMBER THE DECIMAL DENOTES DAYS
@@ -364,31 +609,50 @@ verbose: {verbose}")
             logger.error(msg)
             raise Exception(msg)
 
-        # Selects dates in datenum format and procceeds to forecast value
-        start_date = datenum_data[-1] - (2 * time_win_duration / multiplicator)
-        dates_calc = [date for date in datenum_data if start_date <= date <= datenum_data[-1]]
-        forecasts, bca_conf95, rate_mean = bootstrap_forecast_rolling(dates_calc, multiplicator)
+        #-----------data are sorted in case they were not-----------------
+        sorted_pairs = sorted(zip(datenum_data, mag_data), key=lambda x: x[0])
+        datenum_data, mag_data = map(list, zip(*sorted_pairs))
 
-        # FINAL VALUES OF RATE AND ITS UNCERTAINTY IN THE 5-95 PERCENTILE
-        unc_bca05 = [ci.low for ci in bca_conf95];
-        unc_bca95 = [ci.high for ci in bca_conf95]
-        rate_unc_high = multiplicator / np.array(unc_bca05);
-        rate_unc_low = multiplicator / np.array(unc_bca95);
-        rate_forecast = multiplicator / np.median(forecasts)  # [per time unit]
+        #------Forecasted rate is taken from BEAST or is equal to last value if no changepoints detected-----
+        if len(cps) > 0:
+            rate_forecast = rt[-1]
+            last_cp_bin = int(cps[-1])
+        else:
+            rate_forecast = act_rate[-1]
+            last_cp_bin = len(act_rate) - 1
 
-        # Plot of forecasted activity rate with previous binned activity rate
-        act_rate, bin_counts, bin_edges, out, pprs, rt, idx, u_e = calc_bins(np.array(datenum_data), time_unit,
-                                                                             time_win_duration, dates_calc,
-                                                                             rate_forecast, rate_unc_high, rate_unc_low,
-                                                                             multiplicator, quiet=True, figsize=(14,9))
+        last_cp_datenum = bin_edges[last_cp_bin]
+        dates_calc = [date for date in datenum_data if last_cp_datenum <= date <= datenum_data[-1]]
+        interevent_times = np.diff(dates_calc)
 
-        # Assign probabilities 
-        lambdas, lambdas_perc = lambda_probs(act_rate, dates_calc, bin_edges)
-        lambdas = np.array(lambdas, dtype='d')
-        lambdas_perc = np.array(lambdas_perc, dtype='d')
+        #------------Use BCa for uncertainty intervals-----------------
+        forecast, bca_conf95 = bootstrap_forecast(interevent_times)
+        rate_unc_high = bin_dur / (bca_conf95.low * multiplicator)
+        rate_unc_low = bin_dur / (bca_conf95.high * multiplicator)
+
+        #----------------------Plot------------------------------------
+        plot_results(act_rate, bin_edges, bin_edges_dt, rt, boundaries,
+                      bin_dur, time_unit, multiplicator,
+                      rate_forecast, rate_unc_high, rate_unc_low,
+                      datenum_data, mag_data)
+
+        print("\n----------------- Forecast Summary -----------------")
+        print(f"Forecasted activity rate (next {bin_dur} {time_unit}(s)): {rate_forecast:.4f}")
+        print(f"95% BCa confidence interval: [{rate_unc_low:.4f}, {rate_unc_high:.4f}]")
+        print("------------------------------------------------------")
         
-        # print("Forecasted activity rates: ", lambdas, "events per", time_unit[:-1])
-        logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit} with percentages {lambdas_perc}")
+        lambdas = np.array(rate_forecast/bin_dur, dtype='d')
+        lambdas_perc = np.array(1.0, dtype='d')
+
+
+
+
+
+
+
+        
+        
+        logger.info(f"Forecasted activity rates: {lambdas} events per {time_unit}")
         np.savetxt('activity_rate.csv', np.vstack((lambdas, lambdas_perc)).T, header="lambda, percentage",
                    delimiter=',', fmt='%1.4f')