Add code snippets

This commit is contained in:
2024-08-28 17:22:21 +02:00
parent de2368c07e
commit 064a0f75cb
32 changed files with 2354 additions and 0 deletions

View File

@@ -0,0 +1,110 @@
function [Catalog] = csv2catalog(csvFilePath, column_desc, idPrefix, sortCatalogByTime, useCsvHeader)
% DESCRIPTION: Program to create the catalogue v2.0x in the
% Matlab format file from csv file.
% INPUTS:
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ,'.
% Description of the most popular catalog fields can be found at https://docs.cyfronet.pl/display/ISDOC/Catalog+-+description
% - column_desc : structure containing information how the catalog should be created
% - include: whether the field should be included in the resulting catalog
% - nameInCsv: name of the column in CSV file
% - inputType: type of the column in CSV file (REAL/INTEGER/TEXT/DATE_TIME/DATE/DATE_DAY/DATE_MONTH/DATE_YEAR/TIME)
% - inputTimeFormat: input format for reading time (if 'inputType' is one of time options)
% - inputTimeGroup: input group for merging dates
% - nameInCatalog: name of the column that should be insterted into resulting catalog
% - description: description of the column in the resulting catalog
% - format: format (display format) of the column in the resulting catalog
% - unit: unit of the column in the resulting catalog
% - fieldType: type of the column in the resulting catalog (e.g. Magnitude, Energy)
% - idPrefix : prefix of the ID column if the IDs should be generated (if the catalog doesn't contain
% the id or the column is not included ('include' is set to false)
%TODO handle multiple time columns
%TODO the script fails if any of the rows has an empty value at the end (but empty quotes is ok)
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
colCount = length(column_desc);
noCsvHeaderModifier = 1;
if useCsvHeader
noCsvHeaderModifier = 0;
end
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
k = 1; % column number in the generated catalog
if ~contains_id(column_desc)
if isempty(idPrefix)
[~, idPrefix] = fileparts(csvFilePath);
end
Catalog(k).field = 'ID';
Catalog(k).type = 3;
for row = 1 : rowCount
Catalog(k).val(row, 1) = { strcat(idPrefix, '_', num2str(row,'%04.f')) };
end
Catalog(k).unit = [];
Catalog(k).description = 'Event ID';
Catalog(k).fieldType = [];
k = 2;
end
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
for col=1:colCount
current_col = column_desc(col);
if ~current_col.include || ismember(col, columnsNotToInclude)
continue;
end
inputTimeGroup = current_col.inputTimeGroup;
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
end
end
Catalog(k).field = current_col.nameInCatalog;
Catalog(k).type = current_col.format;
for row = 1 : rowCount
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
end
end
if isempty(rawValue)
if strcmp(current_col.nameInCatalog, 'ID')
error('ID of the event cannot be empty (row: %d)', row)
elseif isText(current_col)
Catalog(k).val(row, 1) = {''};
else
Catalog(k).val(row, 1) = NaN;
end
else
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
if strcmp(current_col.format, '5a')
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy') };
elseif strcmp(current_col.format, '5b')
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy-mm') };
else
Catalog(k).val(row, 1) = parsedValue;
end
end
end
Catalog(k).unit = current_col.unit;
Catalog(k).description = current_col.description;
Catalog(k).fieldType = current_col.fieldType;
k=k+1;
end
if sortCatalogByTime
Catalog = sortByTime(Catalog);
end
end
function containsId = contains_id(column_desc)
idIdxs = find(strcmp(column_desc(1).nameInCatalog, 'ID'));
if isempty(idIdxs)
containsId = 0;
else
containsId = column_desc(idIdxs).include;
end
end

View File

@@ -0,0 +1,88 @@
% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function [gdfFileName] = csv2gdf(csvFilePath, column_desc, description, useCsvHeader)
% DESCRIPTION: Program to create GDF files (in Matlab format) from csv file. Performs a reverse action to the
% gdf2csv.m script.
% INPUTS:
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ,'.
% Description of the most popular GDF file formats can be found at https://docs.cyfronet.pl/display/ISDOC/GDF
% - column_desc : structure containing information how the gdf should be constructed
% - description : description written into the file
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
colCount = length(column_desc);
noCsvHeaderModifier = 1;
if useCsvHeader
noCsvHeaderModifier = 0;
end
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
[FormatName] = 'GDF';
[FormatVersion] = 2.1;
[CRS] = 'n/a';
[TimeZone] = 'UTC';
[Description] = description;
[FieldDescription] = {};
[FieldType] = {};
[FieldUnit] = {};
[d] = struct();
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
colInGdf = 1; % column number in the generated gdf
for col=1:colCount
current_col = column_desc(col);
if ~current_col.include || ismember(col, columnsNotToInclude)
continue;
end
inputTimeGroup = current_col.inputTimeGroup;
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
end
end
fieldName = current_col.nameInCatalog;
FieldDescription(colInGdf, 1) = fieldName;
FieldDescription(colInGdf, 2) = current_col.description;
FieldType(colInGdf, 1) = fieldName;
FieldType(colInGdf, 2) = current_col.format;
FieldUnit(colInGdf, 1) = fieldName;
FieldUnit(colInGdf, 2) = current_col.unit;
d.(fieldName) = [];
for row = 1 : rowCount
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
end
end
if isempty(rawValue)
if isText(current_col)
d.(fieldName)(row) = {''};
else
d.(fieldName)(row) = NaN;
end
else
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
if strcmp(current_col.format, '5a')
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy') };
elseif strcmp(current_col.format, '5b')
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy-mm') };
else
d.(fieldName)(row) = parsedValue;
end
end
end
colInGdf = colInGdf + 1;
end
[~, gdfFileName, ~] = fileparts(csvFilePath);
save(strcat(gdfFileName, '.mat'), 'FormatName', 'FormatVersion', 'CRS', 'TimeZone', 'Description', ...
'FieldDescription', 'FieldType', 'FieldUnit', 'd', '-v7')
end

View File

@@ -0,0 +1,23 @@
% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function [columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc)
% DESCRIPTION: Script iterating through column_desc and returning column indexes grouped by the same
% inputTimeGroup. The second output is array of all the other columns indexes than first in their own respective time group
% INPUTS:
% - column_desc : structure containing definition of the CSV columns and their mapping to the final object
columnsInTimeGroup = containers.Map();
columnsNotToInclude = [];
for i=1:length(column_desc)
inputTimeGroup = column_desc(i).inputTimeGroup;
if ~isempty(inputTimeGroup)
if ~ismember(inputTimeGroup, columnsInTimeGroup.keys)
columnsInTimeGroup(inputTimeGroup) = [i];
else
columnsInTimeGroup(inputTimeGroup) = cat(1, columnsInTimeGroup(inputTimeGroup), i);
columnsNotToInclude = cat(1, columnsNotToInclude, i);
end
end
end
end

View File

@@ -0,0 +1,10 @@
% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function isText = isText(col_desc)
% DESCRIPTION: Function checking if given column is of text type
% INPUTS:
% - col_desc : structure containing information how the column should be constructed
isText = strcmp(col_desc.inputType, 'TEXT') | strcmp(col_desc.format, '5a') | strcmp(col_desc.format, '5b');
end

View File

@@ -0,0 +1,35 @@
% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function parsedValue = parseTextValue(rawValue, type, timeFormat)
% DESCRIPTION: Program that parses and returns value read from the text (a cell from a CSV file).
% INPUTS:
% - rawValue : value to parse
% - type : type of the value as defined by CsvColumnContentType.java
% - timeFormat : if the rawValue contains time, this format is used to parse it
switch type
case {'REAL', 'INTEGER'}
try
parsedValue = str2num(rawValue);
catch
error('Cannot parse number input (type: %s): %s', type, rawValue);
end
if isempty(parsedValue)
% we checked if the value is empty before parsing (and such value will not be parsed), if the value is empty
% here (after parsing), it means that it was in a wrong format and could not be parsed
error('Cannot parse number input (type: %s): %s', type, rawValue);
end
case 'TEXT'
parsedValue = {rawValue};
case 'DATE_TIME'
try
parsedValue = datenum(rawValue, timeFormat);
catch
error('Invalid input time format specification or CSV content to parse (%s)', rawValue);
end
otherwise
error('Unexpected input column type %s', type);
end
end

View File

@@ -0,0 +1,33 @@
% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function data = readAndCheckHeaders(csvFilePath, column_desc, doCheckHeaders)
% DESCRIPTION: Program that reads content from the CSV file, checking if the content matches the headers defined
% in the column_desc structure. The returned value is a cell with all values from the csv file.
% INPUTS:
% - csvFilePath : path to the CSV file
% - column_desc : structure containing definition of the CSV columns and their mapping to the final object
fid = fopen(csvFilePath);
data = textscan(fid, '%q', 'Delimiter', ','){1}; % cell with all values from the csv file
fclose(fid);
if doCheckHeaders
check_headers(data, column_desc);
end
end
function check_headers(data, column_desc)
colCount = length(column_desc);
headers = data(1:colCount);
for i=1:colCount
if ~strcmp(column_desc(i).nameInCsv, headers(i))
error('Expected column %s, but found %s in CSV headers', column_desc(i).nameInCsv, char(headers(i)));
end
end
if mod(length(data), colCount) ~= 0
error('Improper number of values in one of the rows');
end
end