platform-demo-scripts/scripts/data.py
2024-05-13 14:33:19 +02:00

61 lines
1.5 KiB
Python

"""
This file contains functionality related to data.
"""
import os.path
import seisbench.data as sbd
import logging
logging.root.setLevel(logging.INFO)
logger = logging.getLogger('data')
def get_dataset_by_name(name):
"""
Resolve dataset name to class from seisbench.data.
:param name: Name of dataset as defined in seisbench.data.
:return: Dataset class from seisbench.data
"""
try:
return sbd.__getattribute__(name)
except AttributeError:
raise ValueError(f"Unknown dataset '{name}'.")
def get_custom_dataset(path):
"""
Return custom dataset in seisbench format
:param path:
:return: Dataset class
"""
try:
return sbd.WaveformDataset(path)
except AttributeError:
raise ValueError(f"Unknown dataset '{path}'.")
def validate_custom_dataset(data_path):
"""
Validate the dataset
:param data_path: path to the dataset
:return:
"""
# check if path exists
if not os.path.isdir((data_path)):
raise ValueError(f"Data path {data_path} does not exist.")
dataset = sbd.WaveformDataset(data_path)
# check if the dataset is split into train, dev and test
if len(dataset.train()) == 0:
raise ValueError(f"Training set is empty.")
if len(dataset.dev()) == 0:
raise ValueError(f"Dev set is empty.")
if len(dataset.test()) == 0:
raise ValueError(f"Test set is empty.")
logger.info("Custom dataset validated successfully.")