61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
"""
|
|
This file contains functionality related to data.
|
|
"""
|
|
import os.path
|
|
|
|
import seisbench.data as sbd
|
|
import logging
|
|
|
|
logging.root.setLevel(logging.INFO)
|
|
logger = logging.getLogger('data')
|
|
|
|
|
|
def get_dataset_by_name(name):
|
|
"""
|
|
Resolve dataset name to class from seisbench.data.
|
|
|
|
:param name: Name of dataset as defined in seisbench.data.
|
|
:return: Dataset class from seisbench.data
|
|
"""
|
|
try:
|
|
return sbd.__getattribute__(name)
|
|
except AttributeError:
|
|
raise ValueError(f"Unknown dataset '{name}'.")
|
|
|
|
|
|
def get_custom_dataset(path):
|
|
"""
|
|
Return custom dataset in seisbench format
|
|
:param path:
|
|
:return: Dataset class
|
|
"""
|
|
|
|
try:
|
|
return sbd.WaveformDataset(path)
|
|
except AttributeError:
|
|
raise ValueError(f"Unknown dataset '{path}'.")
|
|
|
|
|
|
def validate_custom_dataset(data_path):
|
|
"""
|
|
Validate the dataset
|
|
:param data_path: path to the dataset
|
|
:return:
|
|
"""
|
|
# check if path exists
|
|
if not os.path.isdir((data_path)):
|
|
raise ValueError(f"Data path {data_path} does not exist.")
|
|
|
|
dataset = sbd.WaveformDataset(data_path)
|
|
# check if the dataset is split into train, dev and test
|
|
if len(dataset.train()) == 0:
|
|
raise ValueError(f"Training set is empty.")
|
|
if len(dataset.dev()) == 0:
|
|
raise ValueError(f"Dev set is empty.")
|
|
if len(dataset.test()) == 0:
|
|
raise ValueError(f"Test set is empty.")
|
|
|
|
logger.info("Custom dataset validated successfully.")
|
|
|
|
|