forked from episodes-platform/shared-snippets
110 lines
4.6 KiB
Matlab
110 lines
4.6 KiB
Matlab
function [Catalog] = csv2catalog(csvFilePath, column_desc, idPrefix, sortCatalogByTime, useCsvHeader)
|
||
% DESCRIPTION: Program to create the catalogue v2.0x in the
|
||
% Matlab format file from csv file.
|
||
% INPUTS:
|
||
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ‘,'.
|
||
% Description of the most popular catalog fields can be found at https://docs.cyfronet.pl/display/ISDOC/Catalog+-+description
|
||
% - column_desc : structure containing information how the catalog should be created
|
||
% - include: whether the field should be included in the resulting catalog
|
||
% - nameInCsv: name of the column in CSV file
|
||
% - inputType: type of the column in CSV file (REAL/INTEGER/TEXT/DATE_TIME/DATE/DATE_DAY/DATE_MONTH/DATE_YEAR/TIME)
|
||
% - inputTimeFormat: input format for reading time (if 'inputType' is one of time options)
|
||
% - inputTimeGroup: input group for merging dates
|
||
% - nameInCatalog: name of the column that should be insterted into resulting catalog
|
||
% - description: description of the column in the resulting catalog
|
||
% - format: format (display format) of the column in the resulting catalog
|
||
% - unit: unit of the column in the resulting catalog
|
||
% - fieldType: type of the column in the resulting catalog (e.g. Magnitude, Energy)
|
||
% - idPrefix : prefix of the ID column if the IDs should be generated (if the catalog doesn't contain
|
||
% the id or the column is not included ('include' is set to false)
|
||
|
||
%TODO handle multiple time columns
|
||
%TODO the script fails if any of the rows has an empty value at the end (but empty quotes is ok)
|
||
|
||
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
|
||
colCount = length(column_desc);
|
||
noCsvHeaderModifier = 1;
|
||
if useCsvHeader
|
||
noCsvHeaderModifier = 0;
|
||
end
|
||
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
|
||
k = 1; % column number in the generated catalog
|
||
|
||
if ~contains_id(column_desc)
|
||
if isempty(idPrefix)
|
||
[~, idPrefix] = fileparts(csvFilePath);
|
||
end
|
||
Catalog(k).field = 'ID';
|
||
Catalog(k).type = 3;
|
||
for row = 1 : rowCount
|
||
Catalog(k).val(row, 1) = { strcat(idPrefix, '_', num2str(row,'%04.f')) };
|
||
end
|
||
Catalog(k).unit = [];
|
||
Catalog(k).description = 'Event ID';
|
||
Catalog(k).fieldType = [];
|
||
k = 2;
|
||
end
|
||
|
||
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
|
||
|
||
for col=1:colCount
|
||
current_col = column_desc(col);
|
||
if ~current_col.include || ismember(col, columnsNotToInclude)
|
||
continue;
|
||
end
|
||
|
||
inputTimeGroup = current_col.inputTimeGroup;
|
||
if ~isempty(current_col.inputTimeGroup)
|
||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||
for columnInGroup = 2 : length(timeGroupArray)
|
||
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
|
||
end
|
||
end
|
||
|
||
Catalog(k).field = current_col.nameInCatalog;
|
||
Catalog(k).type = current_col.format;
|
||
for row = 1 : rowCount
|
||
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
|
||
if ~isempty(current_col.inputTimeGroup)
|
||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||
for columnInGroup = 2 : length(timeGroupArray)
|
||
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
|
||
end
|
||
end
|
||
if isempty(rawValue)
|
||
if strcmp(current_col.nameInCatalog, 'ID')
|
||
error('ID of the event cannot be empty (row: %d)', row)
|
||
elseif isText(current_col)
|
||
Catalog(k).val(row, 1) = {''};
|
||
else
|
||
Catalog(k).val(row, 1) = NaN;
|
||
end
|
||
else
|
||
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
|
||
if strcmp(current_col.format, '5a')
|
||
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy') };
|
||
elseif strcmp(current_col.format, '5b')
|
||
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy-mm') };
|
||
else
|
||
Catalog(k).val(row, 1) = parsedValue;
|
||
end
|
||
end
|
||
end
|
||
Catalog(k).unit = current_col.unit;
|
||
Catalog(k).description = current_col.description;
|
||
Catalog(k).fieldType = current_col.fieldType;
|
||
k=k+1;
|
||
end
|
||
if sortCatalogByTime
|
||
Catalog = sortByTime(Catalog);
|
||
end
|
||
end
|
||
|
||
function containsId = contains_id(column_desc)
|
||
idIdxs = find(strcmp(column_desc(1).nameInCatalog, 'ID'));
|
||
if isempty(idIdxs)
|
||
containsId = 0;
|
||
else
|
||
containsId = column_desc(idIdxs).include;
|
||
end
|
||
end |