corrected mseed file pattern

This commit is contained in:
Krystyna Milian 2024-02-26 23:40:15 +01:00
parent 87de2e7a6c
commit bb2e136d42

View File

@ -23,10 +23,11 @@ logging.basicConfig(filename="output.out",
datefmt='%H:%M:%S', datefmt='%H:%M:%S',
level=logging.DEBUG) level=logging.DEBUG)
logging.root.setLevel(logging.INFO)
logger = logging.getLogger('converter') logger = logging.getLogger('converter')
def split_events(events, input_path):
def split_events(events, input_path):
logger.info("Splitting available events into train, dev and test sets ...") logger.info("Splitting available events into train, dev and test sets ...")
events_stats = pd.DataFrame() events_stats = pd.DataFrame()
events_stats.index.name = "event" events_stats.index.name = "event"
@ -57,6 +58,8 @@ def split_events(events, input_path):
else: else:
break break
logger.info(f"Split: {events_stats['split'].value_counts()}")
return events_stats return events_stats
@ -91,7 +94,6 @@ def get_event_params(event):
def get_trace_params(pick): def get_trace_params(pick):
trace_params = { trace_params = {
"station_network_code": pick.waveform_id.network_code, "station_network_code": pick.waveform_id.network_code,
"station_code": pick.waveform_id.station_code, "station_code": pick.waveform_id.station_code,
@ -124,6 +126,7 @@ def get_trace_path(input_path, trace_params):
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}" path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
return path return path
def get_three_channels_trace_paths(input_path, trace_params): def get_three_channels_trace_paths(input_path, trace_params):
year = trace_params["time"].year year = trace_params["time"].year
day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year
@ -132,8 +135,8 @@ def get_three_channels_trace_paths(input_path, trace_params):
paths = [] paths = []
for channel in ["EHE", "EHN", "EHZ"]: for channel in ["EHE", "EHN", "EHZ"]:
paths.append(f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year}") paths.append(
f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year:03}")
return paths return paths
@ -194,7 +197,9 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
metadata_path = output_path + "/metadata.csv" metadata_path = output_path + "/metadata.csv"
waveforms_path = output_path + "/waveforms.hdf5" waveforms_path = output_path + "/waveforms.hdf5"
logger.debug("Catalog loaded, starting conversion ...") events_to_convert = events_stats[events_stats['pick_count'] > 0]
logger.debug("Catalog loaded, starting converting {events_to_convert} events ...")
with sbd.WaveformDataWriter(metadata_path, waveforms_path) as writer: with sbd.WaveformDataWriter(metadata_path, waveforms_path) as writer:
writer.data_format = { writer.data_format = {
@ -239,12 +244,10 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format') parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format')
parser.add_argument('--input_path', type=str, help='Path to mseed files') parser.add_argument('--input_path', type=str, help='Path to mseed files')
parser.add_argument('--catalog_path', type=str, help='Path to events catalog in quakeml format') parser.add_argument('--catalog_path', type=str, help='Path to events catalog in quakeml format')
parser.add_argument('--output_path', type=str, help='Path to output files') parser.add_argument('--output_path', type=str, help='Path to output files')
args = parser.parse_args() args = parser.parse_args()
convert_mseed_to_seisbench_format(args.input_path, args.catalog_path, args.output_path) convert_mseed_to_seisbench_format(args.input_path, args.catalog_path, args.output_path)