From bb2e136d42a5156f62adfc5310ac0a5b8dc317c3 Mon Sep 17 00:00:00 2001 From: Krystyna Milian Date: Mon, 26 Feb 2024 23:40:15 +0100 Subject: [PATCH] corrected mseed file pattern --- scripts/mseeds_to_seisbench.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/mseeds_to_seisbench.py b/scripts/mseeds_to_seisbench.py index 9248fdd..b8cf4c5 100644 --- a/scripts/mseeds_to_seisbench.py +++ b/scripts/mseeds_to_seisbench.py @@ -23,16 +23,17 @@ logging.basicConfig(filename="output.out", datefmt='%H:%M:%S', level=logging.DEBUG) +logging.root.setLevel(logging.INFO) logger = logging.getLogger('converter') -def split_events(events, input_path): +def split_events(events, input_path): logger.info("Splitting available events into train, dev and test sets ...") events_stats = pd.DataFrame() events_stats.index.name = "event" for i, event in enumerate(events): - #check if mseed exists + # check if mseed exists actual_picks = 0 for pick in event.picks: trace_params = get_trace_params(pick) @@ -57,6 +58,8 @@ def split_events(events, input_path): else: break + logger.info(f"Split: {events_stats['split'].value_counts()}") + return events_stats @@ -91,7 +94,6 @@ def get_event_params(event): def get_trace_params(pick): - trace_params = { "station_network_code": pick.waveform_id.network_code, "station_code": pick.waveform_id.station_code, @@ -124,6 +126,7 @@ def get_trace_path(input_path, trace_params): path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}" return path + def get_three_channels_trace_paths(input_path, trace_params): year = trace_params["time"].year day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year @@ -132,8 +135,8 @@ def get_three_channels_trace_paths(input_path, trace_params): paths = [] for channel in ["EHE", "EHN", "EHZ"]: - paths.append(f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year}") - + paths.append( + f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year:03}") return paths @@ -194,7 +197,9 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path): metadata_path = output_path + "/metadata.csv" waveforms_path = output_path + "/waveforms.hdf5" - logger.debug("Catalog loaded, starting conversion ...") + events_to_convert = events_stats[events_stats['pick_count'] > 0] + + logger.debug("Catalog loaded, starting converting {events_to_convert} events ...") with sbd.WaveformDataWriter(metadata_path, waveforms_path) as writer: writer.data_format = { @@ -239,12 +244,10 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format') parser.add_argument('--input_path', type=str, help='Path to mseed files') parser.add_argument('--catalog_path', type=str, help='Path to events catalog in quakeml format') parser.add_argument('--output_path', type=str, help='Path to output files') args = parser.parse_args() - convert_mseed_to_seisbench_format(args.input_path, args.catalog_path, args.output_path)