shared-snippets/matlab/csvconverter/csv2catalog.m
2024-08-28 17:22:21 +02:00

110 lines
4.6 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

function [Catalog] = csv2catalog(csvFilePath, column_desc, idPrefix, sortCatalogByTime, useCsvHeader)
% DESCRIPTION: Program to create the catalogue v2.0x in the
% Matlab format file from csv file.
% INPUTS:
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ,'.
% Description of the most popular catalog fields can be found at https://docs.cyfronet.pl/display/ISDOC/Catalog+-+description
% - column_desc : structure containing information how the catalog should be created
% - include: whether the field should be included in the resulting catalog
% - nameInCsv: name of the column in CSV file
% - inputType: type of the column in CSV file (REAL/INTEGER/TEXT/DATE_TIME/DATE/DATE_DAY/DATE_MONTH/DATE_YEAR/TIME)
% - inputTimeFormat: input format for reading time (if 'inputType' is one of time options)
% - inputTimeGroup: input group for merging dates
% - nameInCatalog: name of the column that should be insterted into resulting catalog
% - description: description of the column in the resulting catalog
% - format: format (display format) of the column in the resulting catalog
% - unit: unit of the column in the resulting catalog
% - fieldType: type of the column in the resulting catalog (e.g. Magnitude, Energy)
% - idPrefix : prefix of the ID column if the IDs should be generated (if the catalog doesn't contain
% the id or the column is not included ('include' is set to false)
%TODO handle multiple time columns
%TODO the script fails if any of the rows has an empty value at the end (but empty quotes is ok)
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
colCount = length(column_desc);
noCsvHeaderModifier = 1;
if useCsvHeader
noCsvHeaderModifier = 0;
end
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
k = 1; % column number in the generated catalog
if ~contains_id(column_desc)
if isempty(idPrefix)
[~, idPrefix] = fileparts(csvFilePath);
end
Catalog(k).field = 'ID';
Catalog(k).type = 3;
for row = 1 : rowCount
Catalog(k).val(row, 1) = { strcat(idPrefix, '_', num2str(row,'%04.f')) };
end
Catalog(k).unit = [];
Catalog(k).description = 'Event ID';
Catalog(k).fieldType = [];
k = 2;
end
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
for col=1:colCount
current_col = column_desc(col);
if ~current_col.include || ismember(col, columnsNotToInclude)
continue;
end
inputTimeGroup = current_col.inputTimeGroup;
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
end
end
Catalog(k).field = current_col.nameInCatalog;
Catalog(k).type = current_col.format;
for row = 1 : rowCount
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
end
end
if isempty(rawValue)
if strcmp(current_col.nameInCatalog, 'ID')
error('ID of the event cannot be empty (row: %d)', row)
elseif isText(current_col)
Catalog(k).val(row, 1) = {''};
else
Catalog(k).val(row, 1) = NaN;
end
else
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
if strcmp(current_col.format, '5a')
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy') };
elseif strcmp(current_col.format, '5b')
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy-mm') };
else
Catalog(k).val(row, 1) = parsedValue;
end
end
end
Catalog(k).unit = current_col.unit;
Catalog(k).description = current_col.description;
Catalog(k).fieldType = current_col.fieldType;
k=k+1;
end
if sortCatalogByTime
Catalog = sortByTime(Catalog);
end
end
function containsId = contains_id(column_desc)
idIdxs = find(strcmp(column_desc(1).nameInCatalog, 'ID'));
if isempty(idIdxs)
containsId = 0;
else
containsId = column_desc(idIdxs).include;
end
end