shared-snippets/matlab/csvconverter/csv2gdf.m
2024-08-28 17:22:21 +02:00

88 lines
3.7 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

% -----------------
% Copyright © 2023 ACK Cyfronet AGH, Poland.
% -----------------
function [gdfFileName] = csv2gdf(csvFilePath, column_desc, description, useCsvHeader)
% DESCRIPTION: Program to create GDF files (in Matlab format) from csv file. Performs a reverse action to the
% gdf2csv.m script.
% INPUTS:
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ,'.
% Description of the most popular GDF file formats can be found at https://docs.cyfronet.pl/display/ISDOC/GDF
% - column_desc : structure containing information how the gdf should be constructed
% - description : description written into the file
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
colCount = length(column_desc);
noCsvHeaderModifier = 1;
if useCsvHeader
noCsvHeaderModifier = 0;
end
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
[FormatName] = 'GDF';
[FormatVersion] = 2.1;
[CRS] = 'n/a';
[TimeZone] = 'UTC';
[Description] = description;
[FieldDescription] = {};
[FieldType] = {};
[FieldUnit] = {};
[d] = struct();
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
colInGdf = 1; % column number in the generated gdf
for col=1:colCount
current_col = column_desc(col);
if ~current_col.include || ismember(col, columnsNotToInclude)
continue;
end
inputTimeGroup = current_col.inputTimeGroup;
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
end
end
fieldName = current_col.nameInCatalog;
FieldDescription(colInGdf, 1) = fieldName;
FieldDescription(colInGdf, 2) = current_col.description;
FieldType(colInGdf, 1) = fieldName;
FieldType(colInGdf, 2) = current_col.format;
FieldUnit(colInGdf, 1) = fieldName;
FieldUnit(colInGdf, 2) = current_col.unit;
d.(fieldName) = [];
for row = 1 : rowCount
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
if ~isempty(current_col.inputTimeGroup)
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
for columnInGroup = 2 : length(timeGroupArray)
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
end
end
if isempty(rawValue)
if isText(current_col)
d.(fieldName)(row) = {''};
else
d.(fieldName)(row) = NaN;
end
else
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
if strcmp(current_col.format, '5a')
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy') };
elseif strcmp(current_col.format, '5b')
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy-mm') };
else
d.(fieldName)(row) = parsedValue;
end
end
end
colInGdf = colInGdf + 1;
end
[~, gdfFileName, ~] = fileparts(csvFilePath);
save(strcat(gdfFileName, '.mat'), 'FormatName', 'FormatVersion', 'CRS', 'TimeZone', 'Description', ...
'FieldDescription', 'FieldType', 'FieldUnit', 'd', '-v7')
end