1 Commits

3 changed files with 53 additions and 52 deletions

View File

@@ -1,6 +1,6 @@
{
"dataset_name": "bogdanka",
"data_path": "datasets/bogdanka/seisbench_format/",
"dataset_name": "lumineos",
"data_path": "datasets/lumineos/seisbench_format/",
"targets_path": "datasets/targets",
"models_path": "weights",
"configs_path": "experiments",

View File

@@ -1,30 +1,54 @@
"""
-----------------
Copyright © 2023 ACK Cyfronet AGH, Poland.
This work was partially funded by EPOS Project funded in frame of PL-POIR4.2
-----------------
"""
import os
import pandas as pd
import glob
from pathlib import Path
import obspy
from obspy.core.event import read_events
import seisbench
import seisbench.data as sbd
import seisbench.util as sbu
import sys
import logging
import argparse
logging.basicConfig(filename="output.out",
filemode='a',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
datefmt='%H:%M:%S',
level=logging.DEBUG)
logger = logging.getLogger('converter')
def create_traces_catalog(directory, years):
for year in years:
directory = f"{directory}/{year}"
files = glob.glob(directory)
traces = []
for i, f in enumerate(files):
st = obspy.read(f)
for tr in st.traces:
# trace_id = tr.id
# start = tr.meta.starttime
# end = tr.meta.endtime
trs = pd.Series({
'trace_id': tr.id,
'trace_st': tr.meta.starttime,
'trace_end': tr.meta.endtime,
'stream_fname': f
})
traces.append(trs)
traces_catalog = pd.DataFrame(pd.concat(traces)).transpose()
traces_catalog.to_csv("data/bogdanka/traces_catalog.csv", append=True, index=False)
def split_events(events, input_path):
logger.info("Splitting available events into train, dev and test sets ...")
@@ -37,7 +61,6 @@ def split_events(events, input_path):
for pick in event.picks:
trace_params = get_trace_params(pick)
trace_path = get_trace_path(input_path, trace_params)
if os.path.isfile(trace_path):
actual_picks += 1
@@ -64,6 +87,7 @@ def get_event_params(event):
origin = event.preferred_origin()
if origin is None:
return {}
# print(origin)
mag = event.preferred_magnitude()
@@ -91,10 +115,12 @@ def get_event_params(event):
def get_trace_params(pick):
net = pick.waveform_id.network_code
sta = pick.waveform_id.station_code
trace_params = {
"station_network_code": pick.waveform_id.network_code,
"station_code": pick.waveform_id.station_code,
"station_network_code": net,
"station_code": sta,
"trace_channel": pick.waveform_id.channel_code,
"station_location_code": pick.waveform_id.location_code,
"time": pick.time
@@ -108,6 +134,7 @@ def find_trace(pick_time, traces):
if pick_time > tr.stats.endtime:
continue
if pick_time >= tr.stats.starttime:
# print(pick_time, " - selected trace: ", tr)
return tr
logger.warning(f"no matching trace for peak: {pick_time}")
@@ -124,25 +151,13 @@ def get_trace_path(input_path, trace_params):
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
return path
def get_three_channels_trace_paths(input_path, trace_params):
year = trace_params["time"].year
day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year
net = trace_params["station_network_code"]
station = trace_params["station_code"]
paths = []
for channel in ["EHE", "EHN", "EHZ"]:
paths.append(f"{input_path}/{year}/{net}/{station}/{channel}.D/{net}.{station}..{channel}.D.{year}.{day_of_year}")
return paths
def load_trace(input_path, trace_params):
trace_path = get_trace_path(input_path, trace_params)
trace = None
if not os.path.isfile(trace_path):
logger.warning(trace_path + " not found")
logger.w(trace_path + " not found")
else:
stream = obspy.read(trace_path)
if len(stream.traces) > 1:
@@ -156,20 +171,14 @@ def load_trace(input_path, trace_params):
def load_stream(input_path, trace_params, time_before=60, time_after=60):
trace_path = get_trace_path(input_path, trace_params)
sampling_rate, stream = None, None
pick_time = trace_params["time"]
trace_paths = get_three_channels_trace_paths(input_path, trace_params)
for trace_path in trace_paths:
if not os.path.isfile(trace_path):
logger.warning(trace_path + " not found")
else:
if stream is None:
stream = obspy.read(trace_path)
else:
stream += obspy.read(trace_path)
if stream is not None:
if not os.path.isfile(trace_path):
print(trace_path + " not found")
else:
stream = obspy.read(trace_path)
stream = stream.slice(pick_time - time_before, pick_time + time_after)
if len(stream.traces) == 0:
print(f"no data in: {trace_path}")
@@ -201,23 +210,13 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
"dimension_order": "CW",
"component_order": "ZNE",
}
for i, event in enumerate(events):
logger.debug(f"Converting {i} event")
event_params = get_event_params(event)
event_params["split"] = events_stats.loc[i, "split"]
picks_per_station = {}
for pick in event.picks:
station = pick.waveform_id.station_code
if station in picks_per_station:
picks_per_station[station].append(pick)
else:
picks_per_station[station] = [pick]
for picks in picks_per_station.values():
trace_params = get_trace_params(picks[0])
trace_params = get_trace_params(pick)
sampling_rate, stream = load_stream(input_path, trace_params)
if stream is None:
continue
@@ -230,14 +229,15 @@ def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
trace_params["trace_sampling_rate_hz"] = sampling_rate
trace_params["trace_start_time"] = str(actual_t_start)
for pick in picks:
pick_time = obspy.core.utcdatetime.UTCDateTime(pick.time)
pick_idx = (pick_time - actual_t_start) * sampling_rate
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
pick_time = obspy.core.utcdatetime.UTCDateTime(trace_params["time"])
pick_idx = (pick_time - actual_t_start) * sampling_rate
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
writer.add_trace({**event_params, **trace_params}, data)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format')

View File

@@ -16,4 +16,5 @@ python -c "import torch; print('Number of CUDA devices:', torch.cuda.device_coun
python -c "import torch; print('Name of GPU:', torch.cuda.get_device_name(torch.cuda.current_device()))"
python pipeline.py --dataset "lumineos"
python pipeline.py --dataset "bogdanka"