Add matlab/export folder

This commit is contained in:
Mieszko Makuch 2024-08-28 12:59:02 +02:00
parent 3cc9f98972
commit 209f7b68d5
6 changed files with 324 additions and 0 deletions

View File

@ -0,0 +1,56 @@
% -----------------
% Copyright © 2022 ACK Cyfronet AGH, Poland.
% -----------------
function catalog2csv(catalog, csvFileName)
fieldNames = getFieldNames(catalog);
fieldTypes = getFieldTypes(catalog);
lineFormat = getLineFormat(fieldTypes);
M = prepareCellMatrix(catalog);
fid = fopen(csvFileName, 'w+');
fprintf(fid, '%s\n', strjoin(fieldNames', ','));
for k=1:size(M,1)
fprintf(fid, lineFormat, M{k, :});
end
fclose(fid);
end
function fieldNames = getFieldNames(catalog)
fieldNames = [];
for i=1:length(catalog)
fieldNames{i} = catalog(i).field;
end
end
function fieldTypes = getFieldTypes(catalog)
fieldTypes = [];
for i=1:length(catalog)
fieldTypes{i} = catalog(i).type;
end
end
function M = prepareCellMatrix(catalog)
M = {};
for i=1:length(catalog)
val = catalog(i).val;
type = catalog(i).type;
if iscell(val)
M(:,i) = val;
elseif isTime(type)
M(:,i) = formatCatalogTime(val, type);
else
M(:,i) = num2cell(val);
end
end
end
function timeStrVector = formatCatalogTime(timeVector, fieldType)
if iscell(timeVector)
timeVector = cell2mat(timeVector);
end
emptyIndexes = isnan(timeVector);
timeStrVector(~emptyIndexes, 1) = cellstr(datestr(timeVector(~emptyIndexes), 'yyyy-mm-dd HH:MM:SS.FFF'));
timeStrVector(emptyIndexes, 1) = { 'NaN' };
end

View File

@ -0,0 +1,16 @@
%
% -----------------
% Copyright © 2020 ACK Cyfronet AGH, Poland.
%
% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2
% --------------
%
function [varargout] = extractGdfFields(gdf, fieldNames)
for i = 1:length(fieldNames)
fieldName = fieldNames{i};
assert(isfield(gdf.d, fieldName), ['gdf has no field with name: ' fieldName])
varargout{i} = getfield(gdf.d, fieldName);
end
end

View File

@ -0,0 +1,46 @@
% -----------------
% Copyright © 2022 ACK Cyfronet AGH, Poland.
% -----------------
%
% Get string format for the time values provided in timeVector. Format is based
% on numeric 'FieldType' (GDF) or 'type' (catalog) field
% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description
function timeStrVector = formatTime(timeVector, fieldType)
% if we have '5a' and '5b', the date value is a string instead of a number)
if (ischar(fieldType) && (strcmp(fieldType, '5a') || strcmp(fieldType, '5b')))
timeStrVector = timeVector;
return;
end
if iscell(timeVector)
timeVector = cell2mat(timeVector);
end
emptyIndexes = isnan(timeVector);
timeFormat = getTimeFormat(fieldType);
timeStrVector(~emptyIndexes, 1) = cellstr(datestr(timeVector(~emptyIndexes), timeFormat));
timeStrVector(emptyIndexes, 1) = { 'NaN' };
end
function timeFormat = getTimeFormat(fieldType)
if (fieldType == 5)
timeFormat = 'yyyy mmm dd HH:MM:SS.FFF';
else
timeFormat = arrayfun(@changeToMatlabFormat, fieldType(2:end));
end
end
% Matlab uses different specification of the format than the one used in GDF files (based on Java format) - e.g., 'M' is
% used in GDF for month, while in Matlab it is used for minutes
function correctedFormatId = changeToMatlabFormat(javaTimeFormatId)
switch javaTimeFormatId
case 'M'
correctedFormatId = 'm';
case 'm'
correctedFormatId = 'M';
case 's'
correctedFormatId = 'S';
case 'S'
correctedFormatId = 'F';
otherwise
correctedFormatId = javaTimeFormatId;
end
end

142
matlab/export/gdf2csv.m Normal file
View File

@ -0,0 +1,142 @@
% -----------------
% Copyright © 2020 ACK Cyfronet AGH, Poland.
%
% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2
% -----------------
function csvFiles = gdf2csv(gdfFilePath)
csvFiles = {};
load(gdfFilePath);
[~, resultFileNameBase] = fileparts(gdfFilePath);
fieldNames = fieldnames(d);
fieldTypes = getFieldTypes(FieldType, fieldNames);
if (hasSingleData(d, fieldNames))
if (length(d) > 1)
M = prepareCellMatrixFromStructArrayWithScalars(d, fieldTypes);
else
M = prepareCellMatrixFromSingleStructWithVectors(d, fieldTypes, fieldNames);
end
resultFileName = [resultFileNameBase, '.csv'];
saveCsvFile(M, fieldTypes, fieldNames, resultFileName);
csvFiles = { resultFileName };
else
for i = 1:length(d)
resultFileName = [resultFileNameBase, '-', num2str(i), '.csv'];
M = prepareCellMatrixFromSingleStructWithVectors(d(i), fieldTypes, fieldNames);
saveCsvFile(M, fieldTypes, fieldNames, resultFileName);
csvFiles{i} = resultFileName;
end
end
end
function isSingle = hasSingleData(d, fieldNames)
isSingle = length(d) == 1 || ~hasAnyVectors(d, fieldNames);
end
function hasVector = hasAnyVectors(d, fieldNames)
hasVector = false;
for i = 1:length(d)
vectorValue = findFirstVectorValue(d(i), fieldNames);
if ~isempty(vectorValue)
hasVector = true;
return;
end
end
end
function vectorValue = findFirstVectorValue(d, fieldNames)
vectorValue = [];
for i = 1:length(fieldNames)
value = d.(fieldNames{i});
if ~ischar(value) && ~isscalar(value)
vectorValue = value;
return;
end
end
end
function saveCsvFile(M, fieldTypes, fieldNames, filename)
lineFormat = getLineFormat(fieldTypes);
fid = fopen(filename, 'w+');
fprintf(fid, '%s\n', strjoin(fieldNames', ','));
for k=1:size(M,1)
fprintf(fid, lineFormat, M{k, :});
end
fclose(fid);
end
function M = prepareCellMatrixFromSingleStructWithVectors(d, fieldTypes, fieldNames)
% some structures might contain vectors mixed with scalars, in that case we want to repeat the scalar values in the csv file
d = convertScalarsToVectors(d, fieldNames);
M = cell(length(d.(fieldNames{1})), length(fieldNames));
for f = 1:length(fieldTypes)
fieldType = fieldTypes{f};
field = d.(fieldNames{f});
if (isTime(fieldType))
M(:, f) = formatTime(field, fieldType);
elseif isnumeric(field)
if (isrow(field))
field = field';
end
M(:, f) = num2cell(field);
else
if (isrow(field))
field = field';
end
M(:, f) = field;
end
end
end
function M = prepareCellMatrixFromStructArrayWithScalars(d, fieldTypes)
M = squeeze(struct2cell(d))';
M = cellfun(@handleEmptyArray, M, 'UniformOutput', false);
for f = 1:length(fieldTypes)
fieldType = fieldTypes{f};
if (isTime(fieldType))
M(:, f) = formatTime(M(:, f), fieldType);
end
end
end
function struct = convertScalarsToVectors(d, fieldNames)
struct = d;
firstVectorValue = findFirstVectorValue(d, fieldNames);
if isempty(firstVectorValue)
return;
end
count = length(firstVectorValue);
for i = 1:length(fieldNames)
field = struct.(fieldNames{i});
if (isempty(field))
field = nan;
end
if ischar(field) || isscalar(field)
[vectorValue{1:count}] = deal(field);
struct.(fieldNames{i}) = vectorValue';
end
end
end
function value = handleEmptyArray(value)
if (isempty(value))
value = nan;
end
end
% creates a vector of field types written as in FieldType, but preserving the same order of fields as in the 'd'
% structure (fallback for a situation when the types in FieldType are in different order than fieldnames(d) or when
% FieldType contains more entries than fieldnames(d)
function fieldTypes = getFieldTypes(fieldTypeCell, fieldNames)
fieldTypes = cell(1, length(fieldNames));
for i = 1:length(fieldNames)
for j = 1:size(fieldTypeCell, 1)
if strcmp(fieldNames{i}, fieldTypeCell{j, 1})
fieldTypes{i} = fieldTypeCell{j, 2};
end
end
end
end

View File

@ -0,0 +1,54 @@
% -----------------
% Copyright © 2022 ACK Cyfronet AGH, Poland.
% -----------------
%
% Get string format specification used for fpritf, based on numeric 'FieldType' (GDF) or 'type' (catalog) field
% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description
function lineFormat = getLineFormat(fieldTypes)
lineFormat = '';
for f = 1:length(fieldTypes)
fieldType = fieldTypes{f};
formatSpec = getFormatSpec(fieldType);
lineFormat = [lineFormat ',' formatSpec];
end
lineFormat = [lineFormat(2:end) '\n'];
end
function formatSpec = getFormatSpec(fieldType)
if isTime(fieldType)
formatSpec = '"%s"';
elseif isnumeric(fieldType)
if fieldType < 10
formatSpec = getFormatSpecFromReservedFieldType(fieldType);
elseif fieldType < 200
formatSpec = ['%.', num2str(mod(fieldType, 10)), 'f'];
elseif fieldType < 300
formatSpec = ['%.', num2str(mod(fieldType, 10)), 'e'];
else
error(['Unrecognized fieldType: ', num2str(fieldType)])
end
else
error(['Unrecognized fieldType: ', num2str(fieldType)])
end
end
function formatSpec = getFormatSpecFromReservedFieldType(fieldType)
switch fieldType
case 1
formatSpec = '%f';
case 2
formatSpec = '%d';
case 3
formatSpec = '"%s"';
case 4
formatSpec = '%.1f';
case 5
formatSpec = '%s';
case 6
formatSpec = '%1.1e';
case 7
formatSpec = '%1.2e';
otherwise
error(['Unrecognized fieldType: ', num2str(fieldType)])
end
end

10
matlab/export/isTime.m Normal file
View File

@ -0,0 +1,10 @@
% -----------------
% Copyright © 2022 ACK Cyfronet AGH, Poland.
% -----------------
%
% Check if the format defined in 'fieldType' marks time or other type of field.
% Based on numeric 'FieldType' (GDF) or 'type' (catalog) field
% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description
function isTime = isTime(fieldType)
isTime = fieldType == 5 || fieldType(1) == '5';
end