Add PhaseAssociator code

2024-12-08 22:57:01 +09:00
parent 092335b36d
commit 686626108e
6 changed files with 441 additions and 0 deletions
--- a/PyOctoPhaseAssociation.py
+++ b/PyOctoPhaseAssociation.py
@@ -0,0 +1,114 @@
+import logging
+import sys
+
+import pandas as pd
+
+from obspy import read_inventory, Catalog
+from obspy.core.inventory import Inventory
+
+from pyoctoAssociator import PhaseAssociation
+from catalogConverter import CatalogConverter
+from qml2picks import qml2picks
+from count_picks import count_picks_from_stations
+
+
+def main(qml_picks, inventory, velocity_model, lat_min, lat_max, lon_min, lon_max, z_min,
+         z_max, grid_hor_extent, grid_spacing, p_vel_const, s_vel_const,
+         n_picks=4, n_p_picks=0, n_s_picks=0, n_p_s_picks=0, time_before=10.0, tolerance=1.0, assoc_cut_dist=250.0,
+         pick_match_tolerance=0.5, p_hint="P", s_hint="S"):
+    """
+    Main function for application: PHASEASSOCIATIONAPP
+    Arguments:
+        velocity_model: input file of type 'mat'
+        qml_picks: input file of type 'quakeml_seismogram_picks'
+        inventory: input file of type 'inventory'
+        grid_hor_extent: parameter of type 'DOUBLE'
+        grid_spacing: parameter of type 'DOUBLE'
+        lat_min: parameter of type 'DOUBLE'
+        lat_max: parameter of type 'DOUBLE'
+        lon_min: parameter of type 'DOUBLE'
+        lon_max: parameter of type 'DOUBLE'
+        z_min: parameter of type 'DOUBLE'
+        z_max: parameter of type 'DOUBLE'
+        n_picks: parameter of type 'INTEGER'
+        n_p_picks: parameter of type 'INTEGER'
+        n_s_picks: parameter of type 'INTEGER'
+        n_p_s_picks: parameter of type 'INTEGER'
+        pick_match_tolerance: parameter of type 'DOUBLE'
+        p_vel_const: parameter of type 'DOUBLE'
+        s_vel_const: parameter of type 'DOUBLE'
+        p_hint: parameter of type 'STRING'
+        s_hint: parameter of type 'STRING'
+        time_before: parameter of type 'DOUBLE'
+        tolerance: parameter of type 'DOUBLE'
+        assoc_cut_dist: parameter of type 'DOUBLE'
+    Returns:
+        File(s) named 'out_catalog' of type 'data_catalog_csv' and format 'PLAIN_TEXT' from working directory
+    """
+
+    inv = Inventory()
+    for inventory_item in inventory:
+        inv.extend(read_inventory(inventory_item))
+
+    if count_picks_from_stations(quakeml_file=qml_picks, inventory=inv) == 0:
+        logging.error(f"There are no stations in the Station Inventory file(s) corresponding to the picked times. "
+                      f"Check if the file(s) is/are correct")
+        sys.exit(1)
+
+    #convert input velocity from m/s to km/s
+    if p_vel_const is not None:
+        p_vel_const = p_vel_const / 1000
+    if s_vel_const is not None:
+        s_vel_const = s_vel_const / 1000
+
+    config_pyocto = {
+        "velocity": {
+            "path": velocity_model,
+            "path_out": "velocity_model_1D",
+            "0D_P_vel": p_vel_const,
+            "0D_S_vel": s_vel_const,
+            "grid_hor_extent": grid_hor_extent,
+            "grid_ver_extent": z_max-z_min,
+            "grid_spacing": grid_spacing
+        },
+        "grid": {
+            "lat_min": lat_min,
+            "lat_max": lat_max,
+            "lon_min": lon_min,
+            "lon_max": lon_max,
+            "z_min": z_min,
+            "z_max": z_max
+        },
+        "params": {
+            "time_before": time_before,
+            "n_picks": n_picks,
+            "n_p_picks": n_p_picks,
+            "n_s_picks": n_s_picks,
+            "n_p_and_s_picks": n_p_s_picks,
+            "tolerance": tolerance,
+            "association_cutoff_distance": assoc_cut_dist,
+            "pick_match_tolerance": pick_match_tolerance
+        }
+    }
+
+    config_conv = {
+        "P_hint": p_hint,
+        "S_hint": s_hint,
+    }
+    picks = qml2picks(qml_picks, p_hint, s_hint)
+    if not picks:
+        # Saving empty qml file
+        cat = Catalog()
+        cat.write("detections.xml", format="QUAKEML")
+    else:
+        pa = PhaseAssociation(inventory=inv, config=config_pyocto)
+        events, assignments = pa.associate_phases(picks)
+        if not events.empty:
+            events['datetime'] = pd.to_datetime(events['time'], unit='s')
+            cc = CatalogConverter(config_conv, picks, events, assignments, "PyOcto")
+            cc.catalog2obspy()
+            cc.save_catalog_to_file("detections.xml")
+        else:
+            # Saving empty qml file
+            cat = Catalog()
+            cat.write("detections.xml", format="QUAKEML")
--- a/catalogConverter.py
+++ b/catalogConverter.py
@@ -0,0 +1,120 @@
+import pandas as pd
+
+from obspy.core.event import Catalog
+from obspy.core.event import Event
+from obspy.core.event import Pick
+from obspy.core.event import Origin
+from obspy.core.event import OriginQuality
+
+from obspy import UTCDateTime
+from obspy.core.event.base import WaveformStreamID, Comment
+
+
+class CatalogConverter:
+    """
+    Class for converting SeisBench and pyOcto detection results to ObsPy Catalog, which can be saved as QuakeML.
+    """
+    def __init__(self, config, picks, catalog_df, assignments_df, name_of_algorithm):
+        self._catalog_df = catalog_df
+        self._assignments_df = assignments_df
+        self._picks = picks
+        self._config = config
+        self._name_of_algorithm = name_of_algorithm
+        self.catalog = None
+
+    def _convert_pick(self, p):
+        """
+        Function converts picks from SeisBench to ObsPy format
+        :param p: SeisBench pick
+        :return: ObsPy pick
+        """
+        pick = Pick()
+        pick.time = UTCDateTime(p.peak_time.datetime)
+        pick.waveform_id = WaveformStreamID(network_code=p.trace_id.split(".")[0],
+                                            station_code=p.trace_id.split(".")[1],
+                                            channel_code=p.trace_id.split(".")[2])
+        if p.phase == 'P':
+            pick.phase_hint = self._config["P_hint"]
+        elif p.phase == 'S':
+            pick.phase_hint = self._config["S_hint"]
+        pick.evaluation_mode = 'automatic'
+        pick.evaluation_status = 'preliminary'
+        return pick
+
+    def _convert_origin(self, origin_sb, list_of_picks_sb):
+        origin = Origin()
+        origin.time = UTCDateTime(pd.to_datetime(origin_sb.time, unit='s').to_pydatetime())
+        origin.latitude = origin_sb.latitude  # float
+        origin.longitude = origin_sb.longitude  # float
+        origin.depth = origin_sb.depth  # float in kilometers (SWIP5 origin version) down the see level
+        origin.depth_type = 'operator assigned'
+        # TODO: make sure that region is not necessary
+        # origin.region = self._config["region"]
+        origin.evaluation_mode = "automatic"
+        origin.evaluation_status = 'preliminary'
+        origin.comments.append(Comment(text=f"Localized by: {self._name_of_algorithm}", force_resource_id=False))
+        origin.quality = OriginQuality(used_phase_count=len(list_of_picks_sb))
+        return origin
+
+    def _convert_event(self, origin_sb, list_of_picks_sb):
+        """
+        Function convert GaMMa detection to ObsPy Event
+        :param origin_sb:
+        :param list_of_picks_sb:
+        :return:
+        """
+        event = Event()
+        for p in list_of_picks_sb:
+            pick = self._convert_pick(p)
+            event.picks.append(pick)
+        origin = self._convert_origin(origin_sb, list_of_picks_sb)
+        event.origins.append(origin)
+        return event
+
+    @staticmethod
+    def _append_pick_trace_id(pick, stream):
+        """
+        Function assigns channel to pick - it is useful for work with SWIP
+        :param pick:
+        :param stream:
+        :return:
+        """
+        channel = stream[0].stats.channel
+        if pick.phase == "P":
+            pick.trace_id = pick.trace_id + channel[:-1] + "Z"
+        if pick.phase == "S":
+            pick.trace_id = pick.trace_id + channel[:-1] + "E"
+        return pick
+
+    def catalog2obspy(self):
+        """
+        Function convert GaMMa catalog and SeisBench picks
+        :return: ObsPy Catalog object
+        """
+        # TODO: make sure that resource id is necessary
+        #cat = Catalog(resource_id=self._config["resource_id"])
+        cat = Catalog()
+        for j, row in self._catalog_df.iterrows():
+            event = self._catalog_df.iloc[j]
+            event_picks = [self._picks[i] for i in
+                           self._assignments_df[self._assignments_df["event_idx"] ==
+                                                event["idx"]]["pick_idx"]]
+            event_obspy = self._convert_event(event, event_picks)
+            cat.append(event_obspy)
+        self.catalog = cat
+
+    def save_catalog_to_file(self, file_path):
+        """
+        Save ObsPy catalog to a file.
+
+        Args:
+            file_path (str): The file path where the catalog will be saved.
+
+        Returns:
+            None
+        """
+        try:
+            self.catalog.write(file_path, format="QUAKEML")
+            print(f"Catalog saved successfully to {file_path}")
+        except Exception as e:
+            print(f"Error occurred while saving catalog: {e}")
--- a/count_picks.py
+++ b/count_picks.py
@@ -0,0 +1,30 @@
+import logging
+import sys
+
+from obspy import read_inventory, read_events
+
+
+def count_picks_from_stations(quakeml_file, inventory):
+
+    # Read the QuakeML file
+    try:
+        events = read_events(quakeml_file)
+    except Exception as e:
+        logging.error(f"Error reading QuakeML file: {e}")
+        sys.exit(1)
+
+    # Extract station network and station codes from the inventory
+    stations = set()
+    for network in inventory:
+        for station in network:
+            stations.add((network.code, station.code))
+
+    # Count the number of picks in the QuakeML file that come from the stations in the inventory
+    pick_count = 0
+    for event in events:
+        for pick in event.picks:
+            net_sta = (pick.waveform_id.network_code, pick.waveform_id.station_code)
+            if net_sta in stations:
+                pick_count += 1
+
+    return pick_count
--- a/phase_associator_wrapper.py
+++ b/phase_associator_wrapper.py
@@ -0,0 +1,59 @@
+#  -----------------
+#  Copyright © 2024 ACK Cyfronet AGH, Poland.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+#  This work was partially funded by DT-GEO Project.
+#  -----------------
+
+import sys
+import argparse
+
+from PyOctoPhaseAssociation import main as phaseassoc
+
+
+def main(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("qml_picks", help="Path to input file of type 'quakeml_seismogram_picks'")
+    parser.add_argument("inventory", nargs='+', help="Paths to input files of type 'inventory'")
+
+    parser.add_argument("--velocity_model", help="Path to input file of type 'velocity_model'", type=str,
+                        required=False)
+    parser.add_argument("--lat_min", type=float)
+    parser.add_argument("--lat_max", type=float)
+    parser.add_argument("--lon_min", type=float)
+    parser.add_argument("--lon_max", type=float)
+    parser.add_argument("--z_min", type=float)
+    parser.add_argument("--z_max", type=float)
+    parser.add_argument("--grid_hor_extent", type=float, default=None, required=False)
+    parser.add_argument("--grid_spacing", type=float, default=None, required=False)
+    parser.add_argument("--p_vel_const", type=float, default=None, required=False)
+    parser.add_argument("--s_vel_const", type=float, default=None, required=False)
+    parser.add_argument("--n_picks", type=int, default=4, required=False)
+    parser.add_argument("--n_p_picks", type=int, default=0, required=False)
+    parser.add_argument("--n_s_picks", type=int, default=0, required=False)
+    parser.add_argument("--n_p_s_picks", type=int, default=0, required=False)
+    parser.add_argument("--time_before", type=float, default=10, required=False)
+    parser.add_argument("--tolerance", type=float, default=1, required=False)
+    parser.add_argument("--assoc_cut_dist", type=float, default=250, required=False)
+    parser.add_argument("--pick_match_tolerance", type=float, default=0.5, required=False)
+    parser.add_argument("--p_hint", type=str, default="P", required=False)
+    parser.add_argument("--s_hint", type=str, default="S", required=False)
+
+    args = parser.parse_args()
+    phaseassoc(**vars(args))
+    return
+
+
+if __name__ == '__main__':
+    main(sys.argv)
--- a/pyoctoAssociator.py
+++ b/pyoctoAssociator.py
@@ -0,0 +1,100 @@
+import os
+
+import numpy as np
+import pandas as pd
+import scipy.io
+
+import pyocto
+
+
+class PhaseAssociation:
+    """
+    Class for phase association using PyOcto.
+    """
+
+    def __init__(self, inventory, config):
+        """
+        Initialize the PhaseAssociation class.
+
+        Args:
+            inventory: ObsPy inventory object.
+            config (dict): Configuration parameters.
+        """
+        self._inv = inventory
+        self._config = config
+        self._setup_velocity_model()
+        self._setup_associator()
+
+    def _setup_velocity_model(self):
+        """
+        Set up the velocity model based on configuration.
+        """
+        velocity_config = self._config["velocity"]
+        if self._config["velocity"]["path"] is None:
+            self.velocity_model = pyocto.VelocityModel0D(
+                p_velocity=self._config["velocity"]["0D_P_vel"],
+                s_velocity=self._config["velocity"]["0D_S_vel"],
+                tolerance=self._config["params"]["tolerance"],
+                association_cutoff_distance=self._config["params"]["association_cutoff_distance"])
+        else:
+            if not os.path.exists(velocity_config["path_out"]):
+                self._create_1d_vel_model()
+            self.velocity_model = pyocto.VelocityModel1D(velocity_config["path_out"],
+                                                         tolerance=self._config["params"]["tolerance"])
+
+    def _mat2df(self):
+        # Load the .mat file
+        mat_contents = scipy.io.loadmat(self._config["velocity"]["path"])
+        data = mat_contents["d"][0][0]
+        depth = data[0].T[0]
+        depth = np.append(depth[::2], depth[-1])
+        vel_p = data[1].T[0]
+        vel_p = np.append(vel_p[0], vel_p[::2])
+        vel_s = data[2].T[0]
+        vel_s = np.append(vel_s[0], vel_s[::2])
+
+        # Create DataFrame from the lists
+        data = {
+            'depth': depth,
+            'vp': vel_p,
+            'vs': vel_s
+        }
+        layers = pd.DataFrame(data)
+
+        return layers
+
+    def _create_1d_vel_model(self):
+
+        layers = self._mat2df()
+        pyocto.VelocityModel1D.create_model(
+            layers,
+            self._config["velocity"]["grid_spacing"],
+            self._config["velocity"]["grid_hor_extent"],
+            self._config["velocity"]["grid_ver_extent"],
+            self._config["velocity"]["path_out"]
+        )
+
+    def _setup_associator(self):
+        """
+        Set up the associator based on configuration.
+        """
+        grid_config = self._config["grid"]
+        associator_params = self._config["params"]
+        self.associator = pyocto.OctoAssociator.from_area(
+            lat=(grid_config["lat_min"], grid_config["lat_max"]),
+            lon=(grid_config["lon_min"], grid_config["lon_max"]),
+            zlim=(grid_config["z_min"], grid_config["z_max"]),
+            time_before=associator_params["time_before"],
+            velocity_model=self.velocity_model,
+            n_picks=associator_params["n_picks"],
+            n_p_picks=associator_params["n_p_picks"],
+            n_s_picks=associator_params["n_s_picks"],
+            n_p_and_s_picks=associator_params["n_p_and_s_picks"],
+            pick_match_tolerance=associator_params["pick_match_tolerance"]
+        )
+
+    def associate_phases(self, picks):
+        stations = self.associator.inventory_to_df(self._inv)
+        events, assignments = self.associator.associate_seisbench(picks, stations)
+        events = self.associator.transform_events(events)
+        return events, assignments
--- a/qml2picks.py
+++ b/qml2picks.py
@@ -0,0 +1,18 @@
+from obspy import read_events
+from seisbench.util import Pick as SBPick
+
+
+def qml2picks(path_to_qml_picks, p_hint, s_hint):
+    c = read_events(path_to_qml_picks, format="QUAKEML")
+    picks = list()
+    for p in c.events[0].picks:
+        # PyOcto requires one character "P" and "S" hints.
+        if p.phase_hint == p_hint:
+            picks.append(
+                SBPick(trace_id=f"{p.waveform_id.network_code}.{p.waveform_id.station_code}.", start_time=p.time,
+                       peak_time=p.time, end_time=p.time, phase="P"))
+        elif p.phase_hint == s_hint:
+            picks.append(
+                SBPick(trace_id=f"{p.waveform_id.network_code}.{p.waveform_id.station_code}.", start_time=p.time,
+                       peak_time=p.time, end_time=p.time, phase="S"))
+    return picks