From deb700560481e6a5749df3230516b615eaa8d7ff Mon Sep 17 00:00:00 2001 From: ftong Date: Tue, 23 Sep 2025 11:41:08 +0200 Subject: [PATCH] Force use of fork in multiprocessing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From Tomasz Balawajder: "Since we are using a Java service to launch the Python process, its behavior differs from running the script directly on the cluster. By default, Dask uses fork() to create worker processes. However, when running under the JVM, the start method defaults to spawn, which does not share memory between processes. This caused the slowdown and unexpected behavior. I’ve forced Python to use fork() in the configuration, and now the application completes in the same time as when executed with sbatch." --- src/seismic_hazard_forecasting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/seismic_hazard_forecasting.py b/src/seismic_hazard_forecasting.py index ad58232..95297ba 100644 --- a/src/seismic_hazard_forecasting.py +++ b/src/seismic_hazard_forecasting.py @@ -69,6 +69,7 @@ def main(catalog_file, mc_file, pdf_file, m_file, m_select, mag_label, mc, m_max from matplotlib.contour import ContourSet import xml.etree.ElementTree as ET import json + import multiprocessing as mp logger = getDefaultLogger('igfash') @@ -448,9 +449,10 @@ verbose: {verbose}") start = timer() - use_pp = False + use_pp = True if use_pp: # use dask parallel computing + mp.set_start_method("fork", force=True) pbar = ProgressBar() pbar.register() iter = indices