Fixed generation of Seisbench dataset, traces are saved with both phases annotations and 3 channels: EHE, EHN, EHZ
This commit is contained in:
parent
4c2679a005
commit
4658b8d866
@ -1,54 +1,30 @@
|
|||||||
|
"""
|
||||||
|
-----------------
|
||||||
|
Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||||
|
This work was partially funded by EPOS Project funded in frame of PL-POIR4.2
|
||||||
|
-----------------
|
||||||
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import glob
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import obspy
|
import obspy
|
||||||
from obspy.core.event import read_events
|
from obspy.core.event import read_events
|
||||||
|
|
||||||
import seisbench
|
|
||||||
import seisbench.data as sbd
|
import seisbench.data as sbd
|
||||||
import seisbench.util as sbu
|
import seisbench.util as sbu
|
||||||
import sys
|
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(filename="output.out",
|
logging.basicConfig(filename="output.out",
|
||||||
filemode='a',
|
filemode='a',
|
||||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
|
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
|
||||||
datefmt='%H:%M:%S',
|
datefmt='%H:%M:%S',
|
||||||
level=logging.DEBUG)
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger('converter')
|
logger = logging.getLogger('converter')
|
||||||
|
|
||||||
def create_traces_catalog(directory, years):
|
|
||||||
for year in years:
|
|
||||||
directory = f"{directory}/{year}"
|
|
||||||
files = glob.glob(directory)
|
|
||||||
traces = []
|
|
||||||
for i, f in enumerate(files):
|
|
||||||
st = obspy.read(f)
|
|
||||||
|
|
||||||
for tr in st.traces:
|
|
||||||
# trace_id = tr.id
|
|
||||||
# start = tr.meta.starttime
|
|
||||||
# end = tr.meta.endtime
|
|
||||||
|
|
||||||
trs = pd.Series({
|
|
||||||
'trace_id': tr.id,
|
|
||||||
'trace_st': tr.meta.starttime,
|
|
||||||
'trace_end': tr.meta.endtime,
|
|
||||||
'stream_fname': f
|
|
||||||
})
|
|
||||||
traces.append(trs)
|
|
||||||
|
|
||||||
traces_catalog = pd.DataFrame(pd.concat(traces)).transpose()
|
|
||||||
traces_catalog.to_csv("data/bogdanka/traces_catalog.csv", append=True, index=False)
|
|
||||||
|
|
||||||
|
|
||||||
def split_events(events, input_path):
|
def split_events(events, input_path):
|
||||||
|
|
||||||
logger.info("Splitting available events into train, dev and test sets ...")
|
logger.info("Splitting available events into train, dev and test sets ...")
|
||||||
@ -61,6 +37,7 @@ def split_events(events, input_path):
|
|||||||
for pick in event.picks:
|
for pick in event.picks:
|
||||||
trace_params = get_trace_params(pick)
|
trace_params = get_trace_params(pick)
|
||||||
trace_path = get_trace_path(input_path, trace_params)
|
trace_path = get_trace_path(input_path, trace_params)
|
||||||
|
|
||||||
if os.path.isfile(trace_path):
|
if os.path.isfile(trace_path):
|
||||||
actual_picks += 1
|
actual_picks += 1
|
||||||
|
|
||||||
@ -87,7 +64,6 @@ def get_event_params(event):
|
|||||||
origin = event.preferred_origin()
|
origin = event.preferred_origin()
|
||||||
if origin is None:
|
if origin is None:
|
||||||
return {}
|
return {}
|
||||||
# print(origin)
|
|
||||||
|
|
||||||
mag = event.preferred_magnitude()
|
mag = event.preferred_magnitude()
|
||||||
|
|
||||||
@ -115,12 +91,10 @@ def get_event_params(event):
|
|||||||
|
|
||||||
|
|
||||||
def get_trace_params(pick):
|
def get_trace_params(pick):
|
||||||
net = pick.waveform_id.network_code
|
|
||||||
sta = pick.waveform_id.station_code
|
|
||||||
|
|
||||||
trace_params = {
|
trace_params = {
|
||||||
"station_network_code": net,
|
"station_network_code": pick.waveform_id.network_code,
|
||||||
"station_code": sta,
|
"station_code": pick.waveform_id.station_code,
|
||||||
"trace_channel": pick.waveform_id.channel_code,
|
"trace_channel": pick.waveform_id.channel_code,
|
||||||
"station_location_code": pick.waveform_id.location_code,
|
"station_location_code": pick.waveform_id.location_code,
|
||||||
"time": pick.time
|
"time": pick.time
|
||||||
@ -134,7 +108,6 @@ def find_trace(pick_time, traces):
|
|||||||
if pick_time > tr.stats.endtime:
|
if pick_time > tr.stats.endtime:
|
||||||
continue
|
continue
|
||||||
if pick_time >= tr.stats.starttime:
|
if pick_time >= tr.stats.starttime:
|
||||||
# print(pick_time, " - selected trace: ", tr)
|
|
||||||
return tr
|
return tr
|
||||||
|
|
||||||
logger.warning(f"no matching trace for peak: {pick_time}")
|
logger.warning(f"no matching trace for peak: {pick_time}")
|
||||||
@ -151,13 +124,25 @@ def get_trace_path(input_path, trace_params):
|
|||||||
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
|
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
def get_three_channels_trace_paths(input_path, trace_params):
|
||||||
|
year = trace_params["time"].year
|
||||||
|
day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year
|
||||||
|
net = trace_params["station_network_code"]
|
||||||
|
station = trace_params["station_code"]
|
||||||
|
|
||||||
|
paths = []
|
||||||
|
for channel in ["EHE", "EHN", "EHZ"]:
|
||||||
|
paths.append(f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year}")
|
||||||
|
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
def load_trace(input_path, trace_params):
|
def load_trace(input_path, trace_params):
|
||||||
trace_path = get_trace_path(input_path, trace_params)
|
trace_path = get_trace_path(input_path, trace_params)
|
||||||
trace = None
|
trace = None
|
||||||
|
|
||||||
if not os.path.isfile(trace_path):
|
if not os.path.isfile(trace_path):
|
||||||
logger.w(trace_path + " not found")
|
logger.warning(trace_path + " not found")
|
||||||
else:
|
else:
|
||||||
stream = obspy.read(trace_path)
|
stream = obspy.read(trace_path)
|
||||||
if len(stream.traces) > 1:
|
if len(stream.traces) > 1:
|
||||||
@ -171,14 +156,20 @@ def load_trace(input_path, trace_params):
|
|||||||
|
|
||||||
|
|
||||||
def load_stream(input_path, trace_params, time_before=60, time_after=60):
|
def load_stream(input_path, trace_params, time_before=60, time_after=60):
|
||||||
trace_path = get_trace_path(input_path, trace_params)
|
|
||||||
sampling_rate, stream = None, None
|
sampling_rate, stream = None, None
|
||||||
pick_time = trace_params["time"]
|
pick_time = trace_params["time"]
|
||||||
|
|
||||||
|
trace_paths = get_three_channels_trace_paths(input_path, trace_params)
|
||||||
|
for trace_path in trace_paths:
|
||||||
if not os.path.isfile(trace_path):
|
if not os.path.isfile(trace_path):
|
||||||
print(trace_path + " not found")
|
logger.warning(trace_path + " not found")
|
||||||
else:
|
else:
|
||||||
|
if stream is None:
|
||||||
stream = obspy.read(trace_path)
|
stream = obspy.read(trace_path)
|
||||||
|
else:
|
||||||
|
stream += obspy.read(trace_path)
|
||||||
|
|
||||||
|
if stream is not None:
|
||||||
stream = stream.slice(pick_time - time_before, pick_time + time_after)
|
stream = stream.slice(pick_time - time_before, pick_time + time_after)
|
||||||
if len(stream.traces) == 0:
|
if len(stream.traces) == 0:
|
||||||
print(f"no data in: {trace_path}")
|
print(f"no data in: {trace_path}")
|
||||||
@ -210,13 +201,23 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
|
|||||||
"dimension_order": "CW",
|
"dimension_order": "CW",
|
||||||
"component_order": "ZNE",
|
"component_order": "ZNE",
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, event in enumerate(events):
|
for i, event in enumerate(events):
|
||||||
logger.debug(f"Converting {i} event")
|
logger.debug(f"Converting {i} event")
|
||||||
event_params = get_event_params(event)
|
event_params = get_event_params(event)
|
||||||
event_params["split"] = events_stats.loc[i, "split"]
|
event_params["split"] = events_stats.loc[i, "split"]
|
||||||
|
|
||||||
|
picks_per_station = {}
|
||||||
for pick in event.picks:
|
for pick in event.picks:
|
||||||
trace_params = get_trace_params(pick)
|
station = pick.waveform_id.station_code
|
||||||
|
if station in picks_per_station:
|
||||||
|
picks_per_station[station].append(pick)
|
||||||
|
else:
|
||||||
|
picks_per_station[station] = [pick]
|
||||||
|
|
||||||
|
for picks in picks_per_station.values():
|
||||||
|
|
||||||
|
trace_params = get_trace_params(picks[0])
|
||||||
sampling_rate, stream = load_stream(input_path, trace_params)
|
sampling_rate, stream = load_stream(input_path, trace_params)
|
||||||
if stream is None:
|
if stream is None:
|
||||||
continue
|
continue
|
||||||
@ -229,15 +230,14 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
|
|||||||
trace_params["trace_sampling_rate_hz"] = sampling_rate
|
trace_params["trace_sampling_rate_hz"] = sampling_rate
|
||||||
trace_params["trace_start_time"] = str(actual_t_start)
|
trace_params["trace_start_time"] = str(actual_t_start)
|
||||||
|
|
||||||
pick_time = obspy.core.utcdatetime.UTCDateTime(trace_params["time"])
|
for pick in picks:
|
||||||
|
pick_time = obspy.core.utcdatetime.UTCDateTime(pick.time)
|
||||||
pick_idx = (pick_time - actual_t_start) * sampling_rate
|
pick_idx = (pick_time - actual_t_start) * sampling_rate
|
||||||
|
|
||||||
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
|
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
|
||||||
|
|
||||||
writer.add_trace({**event_params, **trace_params}, data)
|
writer.add_trace({**event_params, **trace_params}, data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format')
|
parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format')
|
||||||
|
Loading…
Reference in New Issue
Block a user