""" This file contains functionality related to data. """ import os.path import seisbench.data as sbd import logging logging.root.setLevel(logging.INFO) logger = logging.getLogger('data') def get_dataset_by_name(name): """ Resolve dataset name to class from seisbench.data. :param name: Name of dataset as defined in seisbench.data. :return: Dataset class from seisbench.data """ try: return sbd.__getattribute__(name) except AttributeError: raise ValueError(f"Unknown dataset '{name}'.") def get_custom_dataset(path): """ Return custom dataset in seisbench format :param path: :return: Dataset class """ try: return sbd.WaveformDataset(path) except AttributeError: raise ValueError(f"Unknown dataset '{path}'.") def validate_custom_dataset(data_path): """ Validate the dataset :param data_path: path to the dataset :return: """ # check if path exists if not os.path.isdir((data_path)): raise ValueError(f"Data path {data_path} does not exist.") dataset = sbd.WaveformDataset(data_path) # check if the dataset is split into train, dev and test if len(dataset.train()) == 0: raise ValueError(f"Training set is empty.") if len(dataset.dev()) == 0: raise ValueError(f"Dev set is empty.") if len(dataset.test()) == 0: raise ValueError(f"Test set is empty.") logger.info("Custom dataset validated successfully.")