Added scripts converting mseeds from Bogdanka to seisbench format, extended readme, modidified logging
This commit is contained in:
1134
utils/Transforming mseeds from Bogdanka to Seisbench format.ipynb
Normal file
1134
utils/Transforming mseeds from Bogdanka to Seisbench format.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -88,8 +88,8 @@
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Datetime X Y Depth Mw \n",
|
||||
"0 2020-01-01 10:09:42.200 5.582503e+06 5.702646e+06 0.7 2.469231 \\\n",
|
||||
" Datetime X Y Depth Mw \\\n",
|
||||
"0 2020-01-01 10:09:42.200 5.582503e+06 5.702646e+06 0.7 2.469231 \n",
|
||||
"\n",
|
||||
" Phases mseed_name \n",
|
||||
"0 Pg BRDW 2020-01-01 10:09:44.400, Sg BRDW 2020-... 20200101100941.mseed "
|
||||
@@ -101,7 +101,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_path = str(Path.cwd().parent) + \"/data/igf/\"\n",
|
||||
"input_path = str(Path.cwd().parent) + \"/datasets/igf/\"\n",
|
||||
"catalog = pd.read_excel(input_path + \"Catalog_20_21.xlsx\", index_col=0)\n",
|
||||
"catalog.head(1)"
|
||||
]
|
||||
@@ -317,7 +317,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Traces converted: 35784it [00:52, 679.39it/s]\n"
|
||||
"Traces converted: 35784it [01:01, 578.58it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -339,8 +339,10 @@
|
||||
" continue\n",
|
||||
" if os.path.exists(input_path + \"mseeds/mseeds_2020/\" + event.mseed_name):\n",
|
||||
" mseed_path = input_path + \"mseeds/mseeds_2020/\" + event.mseed_name \n",
|
||||
" else:\n",
|
||||
" elif os.path.exists(input_path + \"mseeds/mseeds_2021/\" + event.mseed_name):\n",
|
||||
" mseed_path = input_path + \"mseeds/mseeds_2021/\" + event.mseed_name \n",
|
||||
" else: \n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" stream = get_mseed(mseed_path)\n",
|
||||
@@ -374,6 +376,8 @@
|
||||
" # trace_params[f\"trace_{pick.phase_hint}_status\"] = pick.evaluation_mode\n",
|
||||
" \n",
|
||||
" writer.add_trace({**event_params, **trace_params}, data)\n",
|
||||
"\n",
|
||||
" # break\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
@@ -393,7 +397,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = sbd.WaveformDataset(output_path, sampling_rate=100)"
|
||||
"data = sbd.WaveformDataset(output_path, sampling_rate=100)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "33c77509-7aab-4833-a372-16030941395d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Unnamed dataset - 35784 traces\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,17 +428,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "1753f65e-fe5d-4cfa-ab42-ae161ac4a253",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.lines.Line2D at 0x7f7ed04a8820>"
|
||||
"<matplotlib.lines.Line2D at 0x14d6c12d0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@@ -449,7 +471,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"id": "bf7dae75-c90b-44f8-a51d-44e8abaaa3c3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -472,7 +494,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 15,
|
||||
"id": "de82db24-d983-4592-a0eb-f96beecb2f69",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -622,29 +644,29 @@
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" index source_origin_time source_latitude_deg source_longitude_deg \n",
|
||||
"0 0 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \\\n",
|
||||
" index source_origin_time source_latitude_deg source_longitude_deg \\\n",
|
||||
"0 0 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \n",
|
||||
"1 1 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \n",
|
||||
"2 2 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \n",
|
||||
"3 3 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \n",
|
||||
"4 4 2020-01-01 10:09:42.200 5.702646e+06 5.582503e+06 \n",
|
||||
"\n",
|
||||
" source_depth_km source_magnitude split station_network_code station_code \n",
|
||||
"0 0.7 2.469231 train PL BRDW \\\n",
|
||||
" source_depth_km source_magnitude split station_network_code station_code \\\n",
|
||||
"0 0.7 2.469231 train PL BRDW \n",
|
||||
"1 0.7 2.469231 train PL BRDW \n",
|
||||
"2 0.7 2.469231 train PL GROD \n",
|
||||
"3 0.7 2.469231 train PL GROD \n",
|
||||
"4 0.7 2.469231 train PL GUZI \n",
|
||||
"\n",
|
||||
" trace_channel trace_sampling_rate_hz trace_start_time \n",
|
||||
"0 EHE 100.0 2020-01-01T10:09:36.480000Z \\\n",
|
||||
" trace_channel trace_sampling_rate_hz trace_start_time \\\n",
|
||||
"0 EHE 100.0 2020-01-01T10:09:36.480000Z \n",
|
||||
"1 EHE 100.0 2020-01-01T10:09:36.480000Z \n",
|
||||
"2 EHE 100.0 2020-01-01T10:09:36.480000Z \n",
|
||||
"3 EHE 100.0 2020-01-01T10:09:36.480000Z \n",
|
||||
"4 CNE 100.0 2020-01-01T10:09:36.476000Z \n",
|
||||
"\n",
|
||||
" trace_Pg_arrival_sample trace_name trace_Sg_arrival_sample \n",
|
||||
"0 792.0 bucket0$0,:3,:2001 NaN \\\n",
|
||||
" trace_Pg_arrival_sample trace_name trace_Sg_arrival_sample \\\n",
|
||||
"0 792.0 bucket0$0,:3,:2001 NaN \n",
|
||||
"1 NaN bucket0$1,:3,:2001 921.0 \n",
|
||||
"2 872.0 bucket0$2,:3,:2001 NaN \n",
|
||||
"3 NaN bucket0$3,:3,:2001 1017.0 \n",
|
||||
@@ -658,7 +680,7 @@
|
||||
"4 ZNE "
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -700,7 +722,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
19
utils/convert_data.sh
Normal file
19
utils/convert_data.sh
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=mseeds_to_seisbench
|
||||
#SBATCH --time=1:00:00
|
||||
#SBATCH --account=plgeposai22gpu-gpu
|
||||
#SBATCH --partition plgrid
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --ntasks-per-node=1
|
||||
#SBATCH --mem=24gb
|
||||
|
||||
|
||||
## activate conda environment
|
||||
source /net/pr2/projects/plgrid/plggeposai/kmilian/mambaforge/bin/activate
|
||||
conda activate epos-ai-train
|
||||
|
||||
input_path="/net/pr2/projects/plgrid/plggeposai/datasets/bogdanka"
|
||||
catalog_path="/net/pr2/projects/plgrid/plggeposai/datasets/bogdanka/BOIS_all.xml"
|
||||
output_path="/net/pr2/projects/plgrid/plggeposai/kmilian/platform-demo-scripts/datasets/bogdanka/seisbench_format"
|
||||
|
||||
python mseeds_to_seisbench.py --input_path $input_path --catalog_path $catalog_path --output_path $output_path
|
250
utils/mseeds_to_seisbench.py
Normal file
250
utils/mseeds_to_seisbench.py
Normal file
@@ -0,0 +1,250 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import glob
|
||||
from pathlib import Path
|
||||
|
||||
import obspy
|
||||
from obspy.core.event import read_events
|
||||
|
||||
import seisbench
|
||||
import seisbench.data as sbd
|
||||
import seisbench.util as sbu
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
|
||||
logging.basicConfig(filename="output.out",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=logging.DEBUG)
|
||||
|
||||
|
||||
|
||||
logger = logging.getLogger('converter')
|
||||
|
||||
def create_traces_catalog(directory, years):
|
||||
for year in years:
|
||||
directory = f"{directory}/{year}"
|
||||
files = glob.glob(directory)
|
||||
traces = []
|
||||
for i, f in enumerate(files):
|
||||
st = obspy.read(f)
|
||||
|
||||
for tr in st.traces:
|
||||
# trace_id = tr.id
|
||||
# start = tr.meta.starttime
|
||||
# end = tr.meta.endtime
|
||||
|
||||
trs = pd.Series({
|
||||
'trace_id': tr.id,
|
||||
'trace_st': tr.meta.starttime,
|
||||
'trace_end': tr.meta.endtime,
|
||||
'stream_fname': f
|
||||
})
|
||||
traces.append(trs)
|
||||
|
||||
traces_catalog = pd.DataFrame(pd.concat(traces)).transpose()
|
||||
traces_catalog.to_csv("data/bogdanka/traces_catalog.csv", append=True, index=False)
|
||||
|
||||
|
||||
def split_events(events, input_path):
|
||||
|
||||
logger.info("Splitting available events into train, dev and test sets ...")
|
||||
events_stats = pd.DataFrame()
|
||||
events_stats.index.name = "event"
|
||||
|
||||
for i, event in enumerate(events):
|
||||
#check if mseed exists
|
||||
actual_picks = 0
|
||||
for pick in event.picks:
|
||||
trace_params = get_trace_params(pick)
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
if os.path.isfile(trace_path):
|
||||
actual_picks += 1
|
||||
|
||||
events_stats.loc[i, "pick_count"] = actual_picks
|
||||
|
||||
events_stats['pick_count_cumsum'] = events_stats.pick_count.cumsum()
|
||||
|
||||
train_th = 0.7 * events_stats.pick_count_cumsum.values[-1]
|
||||
dev_th = 0.85 * events_stats.pick_count_cumsum.values[-1]
|
||||
|
||||
events_stats['split'] = 'test'
|
||||
for i, event in events_stats.iterrows():
|
||||
if event['pick_count_cumsum'] < train_th:
|
||||
events_stats.loc[i, 'split'] = 'train'
|
||||
elif event['pick_count_cumsum'] < dev_th:
|
||||
events_stats.loc[i, 'split'] = 'dev'
|
||||
else:
|
||||
break
|
||||
|
||||
return events_stats
|
||||
|
||||
|
||||
def get_event_params(event):
|
||||
origin = event.preferred_origin()
|
||||
if origin is None:
|
||||
return {}
|
||||
# print(origin)
|
||||
|
||||
mag = event.preferred_magnitude()
|
||||
|
||||
source_id = str(event.resource_id)
|
||||
|
||||
event_params = {
|
||||
"source_id": source_id,
|
||||
"source_origin_uncertainty_sec": origin.time_errors["uncertainty"],
|
||||
"source_latitude_deg": origin.latitude,
|
||||
"source_latitude_uncertainty_km": origin.latitude_errors["uncertainty"],
|
||||
"source_longitude_deg": origin.longitude,
|
||||
"source_longitude_uncertainty_km": origin.longitude_errors["uncertainty"],
|
||||
"source_depth_km": origin.depth / 1e3,
|
||||
"source_depth_uncertainty_km": origin.depth_errors["uncertainty"] / 1e3 if origin.depth_errors[
|
||||
"uncertainty"] is not None else None,
|
||||
}
|
||||
|
||||
if mag is not None:
|
||||
event_params["source_magnitude"] = mag.mag
|
||||
event_params["source_magnitude_uncertainty"] = mag.mag_errors["uncertainty"]
|
||||
event_params["source_magnitude_type"] = mag.magnitude_type
|
||||
event_params["source_magnitude_author"] = mag.creation_info.agency_id if mag.creation_info is not None else None
|
||||
|
||||
return event_params
|
||||
|
||||
|
||||
def get_trace_params(pick):
|
||||
net = pick.waveform_id.network_code
|
||||
sta = pick.waveform_id.station_code
|
||||
|
||||
trace_params = {
|
||||
"station_network_code": net,
|
||||
"station_code": sta,
|
||||
"trace_channel": pick.waveform_id.channel_code,
|
||||
"station_location_code": pick.waveform_id.location_code,
|
||||
"time": pick.time
|
||||
}
|
||||
|
||||
return trace_params
|
||||
|
||||
|
||||
def find_trace(pick_time, traces):
|
||||
for tr in traces:
|
||||
if pick_time > tr.stats.endtime:
|
||||
continue
|
||||
if pick_time >= tr.stats.starttime:
|
||||
# print(pick_time, " - selected trace: ", tr)
|
||||
return tr
|
||||
|
||||
logger.warning(f"no matching trace for peak: {pick_time}")
|
||||
return None
|
||||
|
||||
|
||||
def get_trace_path(input_path, trace_params):
|
||||
year = trace_params["time"].year
|
||||
day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year
|
||||
net = trace_params["station_network_code"]
|
||||
station = trace_params["station_code"]
|
||||
tr_channel = trace_params["trace_channel"]
|
||||
|
||||
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
|
||||
return path
|
||||
|
||||
|
||||
def load_trace(input_path, trace_params):
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
trace = None
|
||||
|
||||
if not os.path.isfile(trace_path):
|
||||
logger.w(trace_path + " not found")
|
||||
else:
|
||||
stream = obspy.read(trace_path)
|
||||
if len(stream.traces) > 1:
|
||||
trace = find_trace(trace_params["time"], stream.traces)
|
||||
elif len(stream.traces) == 0:
|
||||
logger.warning(f"no data in: {trace_path}")
|
||||
else:
|
||||
trace = stream.traces[0]
|
||||
|
||||
return trace
|
||||
|
||||
|
||||
def load_stream(input_path, trace_params, time_before=60, time_after=60):
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
sampling_rate, stream = None, None
|
||||
pick_time = trace_params["time"]
|
||||
|
||||
if not os.path.isfile(trace_path):
|
||||
print(trace_path + " not found")
|
||||
else:
|
||||
stream = obspy.read(trace_path)
|
||||
stream = stream.slice(pick_time - time_before, pick_time + time_after)
|
||||
if len(stream.traces) == 0:
|
||||
print(f"no data in: {trace_path}")
|
||||
else:
|
||||
sampling_rate = stream.traces[0].stats.sampling_rate
|
||||
|
||||
return sampling_rate, stream
|
||||
|
||||
|
||||
def convert_mseed_to_seisbench_format(input_path, catalog_path, output_path):
|
||||
"""
|
||||
Convert mseed files to seisbench dataset format
|
||||
:param input_path: folder with mseed files
|
||||
:param catalog_path: path to events catalog in quakeml format
|
||||
:param output_path: folder to save seisbench dataset
|
||||
:return:
|
||||
"""
|
||||
logger.info("Loading events catalog ...")
|
||||
events = read_events(catalog_path)
|
||||
events_stats = split_events(events, input_path)
|
||||
|
||||
metadata_path = output_path + "/metadata.csv"
|
||||
waveforms_path = output_path + "/waveforms.hdf5"
|
||||
|
||||
logger.debug("Catalog loaded, starting conversion ...")
|
||||
|
||||
with sbd.WaveformDataWriter(metadata_path, waveforms_path) as writer:
|
||||
writer.data_format = {
|
||||
"dimension_order": "CW",
|
||||
"component_order": "ZNE",
|
||||
}
|
||||
for i, event in enumerate(events):
|
||||
logger.debug(f"Converting {i} event")
|
||||
event_params = get_event_params(event)
|
||||
event_params["split"] = events_stats.loc[i, "split"]
|
||||
|
||||
for pick in event.picks:
|
||||
trace_params = get_trace_params(pick)
|
||||
sampling_rate, stream = load_stream(input_path, trace_params)
|
||||
if stream is None:
|
||||
continue
|
||||
|
||||
actual_t_start, data, _ = sbu.stream_to_array(
|
||||
stream,
|
||||
component_order=writer.data_format["component_order"],
|
||||
)
|
||||
|
||||
trace_params["trace_sampling_rate_hz"] = sampling_rate
|
||||
trace_params["trace_start_time"] = str(actual_t_start)
|
||||
|
||||
pick_time = obspy.core.utcdatetime.UTCDateTime(trace_params["time"])
|
||||
pick_idx = (pick_time - actual_t_start) * sampling_rate
|
||||
|
||||
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
|
||||
|
||||
writer.add_trace({**event_params, **trace_params}, data)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert mseed files to seisbench format')
|
||||
parser.add_argument('--input_path', type=str, help='Path to mseed files')
|
||||
parser.add_argument('--catalog_path', type=str, help='Path to events catalog in quakeml format')
|
||||
parser.add_argument('--output_path', type=str, help='Path to output files')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
convert_mseed_to_seisbench_format(args.input_path, args.catalog_path, args.output_path)
|
230
utils/utils.py
Normal file
230
utils/utils.py
Normal file
@@ -0,0 +1,230 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import glob
|
||||
from pathlib import Path
|
||||
|
||||
import obspy
|
||||
from obspy.core.event import read_events
|
||||
|
||||
import seisbench.data as sbd
|
||||
import seisbench.util as sbu
|
||||
import sys
|
||||
import logging
|
||||
|
||||
logging.basicConfig(filename="output.out",
|
||||
filemode='a',
|
||||
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
|
||||
datefmt='%H:%M:%S',
|
||||
level=logging.DEBUG)
|
||||
|
||||
logger = logging.getLogger('converter')
|
||||
|
||||
def create_traces_catalog(directory, years):
|
||||
for year in years:
|
||||
directory = f"{directory}/{year}"
|
||||
files = glob.glob(directory)
|
||||
traces = []
|
||||
for i, f in enumerate(files):
|
||||
st = obspy.read(f)
|
||||
|
||||
for tr in st.traces:
|
||||
# trace_id = tr.id
|
||||
# start = tr.meta.starttime
|
||||
# end = tr.meta.endtime
|
||||
|
||||
trs = pd.Series({
|
||||
'trace_id': tr.id,
|
||||
'trace_st': tr.meta.starttime,
|
||||
'trace_end': tr.meta.endtime,
|
||||
'stream_fname': f
|
||||
})
|
||||
traces.append(trs)
|
||||
|
||||
traces_catalog = pd.DataFrame(pd.concat(traces)).transpose()
|
||||
traces_catalog.to_csv("data/bogdanka/traces_catalog.csv", append=True, index=False)
|
||||
|
||||
|
||||
def split_events(events, input_path):
|
||||
|
||||
logger.info("Splitting available events into train, dev and test sets ...")
|
||||
events_stats = pd.DataFrame()
|
||||
events_stats.index.name = "event"
|
||||
|
||||
for i, event in enumerate(events):
|
||||
#check if mseed exists
|
||||
actual_picks = 0
|
||||
for pick in event.picks:
|
||||
trace_params = get_trace_params(pick)
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
if os.path.isfile(trace_path):
|
||||
actual_picks += 1
|
||||
|
||||
events_stats.loc[i, "pick_count"] = actual_picks
|
||||
|
||||
events_stats['pick_count_cumsum'] = events_stats.pick_count.cumsum()
|
||||
|
||||
train_th = 0.7 * events_stats.pick_count_cumsum.values[-1]
|
||||
dev_th = 0.85 * events_stats.pick_count_cumsum.values[-1]
|
||||
|
||||
events_stats['split'] = 'test'
|
||||
for i, event in events_stats.iterrows():
|
||||
if event['pick_count_cumsum'] < train_th:
|
||||
events_stats.loc[i, 'split'] = 'train'
|
||||
elif event['pick_count_cumsum'] < dev_th:
|
||||
events_stats.loc[i, 'split'] = 'dev'
|
||||
else:
|
||||
break
|
||||
|
||||
return events_stats
|
||||
|
||||
|
||||
def get_event_params(event):
|
||||
origin = event.preferred_origin()
|
||||
if origin is None:
|
||||
return {}
|
||||
# print(origin)
|
||||
|
||||
mag = event.preferred_magnitude()
|
||||
|
||||
source_id = str(event.resource_id)
|
||||
|
||||
event_params = {
|
||||
"source_id": source_id,
|
||||
"source_origin_uncertainty_sec": origin.time_errors["uncertainty"],
|
||||
"source_latitude_deg": origin.latitude,
|
||||
"source_latitude_uncertainty_km": origin.latitude_errors["uncertainty"],
|
||||
"source_longitude_deg": origin.longitude,
|
||||
"source_longitude_uncertainty_km": origin.longitude_errors["uncertainty"],
|
||||
"source_depth_km": origin.depth / 1e3,
|
||||
"source_depth_uncertainty_km": origin.depth_errors["uncertainty"] / 1e3 if origin.depth_errors[
|
||||
"uncertainty"] is not None else None,
|
||||
}
|
||||
|
||||
if mag is not None:
|
||||
event_params["source_magnitude"] = mag.mag
|
||||
event_params["source_magnitude_uncertainty"] = mag.mag_errors["uncertainty"]
|
||||
event_params["source_magnitude_type"] = mag.magnitude_type
|
||||
event_params["source_magnitude_author"] = mag.creation_info.agency_id if mag.creation_info is not None else None
|
||||
|
||||
return event_params
|
||||
|
||||
|
||||
def get_trace_params(pick):
|
||||
net = pick.waveform_id.network_code
|
||||
sta = pick.waveform_id.station_code
|
||||
|
||||
trace_params = {
|
||||
"station_network_code": net,
|
||||
"station_code": sta,
|
||||
"trace_channel": pick.waveform_id.channel_code,
|
||||
"station_location_code": pick.waveform_id.location_code,
|
||||
"time": pick.time
|
||||
}
|
||||
|
||||
return trace_params
|
||||
|
||||
|
||||
def find_trace(pick_time, traces):
|
||||
for tr in traces:
|
||||
if pick_time > tr.stats.endtime:
|
||||
continue
|
||||
if pick_time >= tr.stats.starttime:
|
||||
# print(pick_time, " - selected trace: ", tr)
|
||||
return tr
|
||||
|
||||
logger.warning(f"no matching trace for peak: {pick_time}")
|
||||
return None
|
||||
|
||||
|
||||
def get_trace_path(input_path, trace_params):
|
||||
year = trace_params["time"].year
|
||||
day_of_year = pd.Timestamp(str(trace_params["time"])).day_of_year
|
||||
net = trace_params["station_network_code"]
|
||||
station = trace_params["station_code"]
|
||||
tr_channel = trace_params["trace_channel"]
|
||||
|
||||
path = f"{input_path}/{year}/{net}/{station}/{tr_channel}.D/{net}.{station}..{tr_channel}.D.{year}.{day_of_year}"
|
||||
return path
|
||||
|
||||
|
||||
def load_trace(input_path, trace_params):
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
trace = None
|
||||
|
||||
if not os.path.isfile(trace_path):
|
||||
logger.w(trace_path + " not found")
|
||||
else:
|
||||
stream = obspy.read(trace_path)
|
||||
if len(stream.traces) > 1:
|
||||
trace = find_trace(trace_params["time"], stream.traces)
|
||||
elif len(stream.traces) == 0:
|
||||
logger.warning(f"no data in: {trace_path}")
|
||||
else:
|
||||
trace = stream.traces[0]
|
||||
|
||||
return trace
|
||||
|
||||
|
||||
def load_stream(input_path, trace_params, time_before=60, time_after=60):
|
||||
trace_path = get_trace_path(input_path, trace_params)
|
||||
sampling_rate, stream = None, None
|
||||
pick_time = trace_params["time"]
|
||||
|
||||
if not os.path.isfile(trace_path):
|
||||
print(trace_path + " not found")
|
||||
else:
|
||||
stream = obspy.read(trace_path)
|
||||
stream = stream.slice(pick_time - time_before, pick_time + time_after)
|
||||
if len(stream.traces) == 0:
|
||||
print(f"no data in: {trace_path}")
|
||||
else:
|
||||
sampling_rate = stream.traces[0].stats.sampling_rate
|
||||
|
||||
return sampling_rate, stream
|
||||
|
||||
|
||||
def convert_mseed_to_seisbench_format():
|
||||
input_path = "/net/pr2/projects/plgrid/plggeposai"
|
||||
logger.info("Loading events catalog ...")
|
||||
events = read_events(input_path + "/BOIS_all.xml")
|
||||
events_stats = split_events(events)
|
||||
output_path = input_path + "/seisbench_format"
|
||||
metadata_path = output_path + "/metadata.csv"
|
||||
waveforms_path = output_path + "/waveforms.hdf5"
|
||||
|
||||
with sbd.WaveformDataWriter(metadata_path, waveforms_path) as writer:
|
||||
writer.data_format = {
|
||||
"dimension_order": "CW",
|
||||
"component_order": "ZNE",
|
||||
}
|
||||
for i, event in enumerate(events):
|
||||
logger.debug(f"Converting {i} event")
|
||||
event_params = get_event_params(event)
|
||||
event_params["split"] = events_stats.loc[i, "split"]
|
||||
# b = False
|
||||
|
||||
for pick in event.picks:
|
||||
trace_params = get_trace_params(pick)
|
||||
sampling_rate, stream = load_stream(input_path, trace_params)
|
||||
if stream is None:
|
||||
continue
|
||||
|
||||
actual_t_start, data, _ = sbu.stream_to_array(
|
||||
stream,
|
||||
component_order=writer.data_format["component_order"],
|
||||
)
|
||||
|
||||
trace_params["trace_sampling_rate_hz"] = sampling_rate
|
||||
trace_params["trace_start_time"] = str(actual_t_start)
|
||||
|
||||
pick_time = obspy.core.utcdatetime.UTCDateTime(trace_params["time"])
|
||||
pick_idx = (pick_time - actual_t_start) * sampling_rate
|
||||
|
||||
trace_params[f"trace_{pick.phase_hint}_arrival_sample"] = int(pick_idx)
|
||||
|
||||
writer.add_trace({**event_params, **trace_params}, data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
convert_mseed_to_seisbench_format()
|
||||
# create_traces_catalog("/net/pr2/projects/plgrid/plggeposai/", ["2018", "2019"])
|
Reference in New Issue
Block a user