diff --git a/matlab/export/catalog2csv.m b/matlab/export/catalog2csv.m new file mode 100644 index 0000000..24a1436 --- /dev/null +++ b/matlab/export/catalog2csv.m @@ -0,0 +1,56 @@ +% ----------------- +% Copyright © 2022 ACK Cyfronet AGH, Poland. +% ----------------- +function catalog2csv(catalog, csvFileName) + + fieldNames = getFieldNames(catalog); + fieldTypes = getFieldTypes(catalog); + lineFormat = getLineFormat(fieldTypes); + M = prepareCellMatrix(catalog); + + fid = fopen(csvFileName, 'w+'); + fprintf(fid, '%s\n', strjoin(fieldNames', ',')); + for k=1:size(M,1) + fprintf(fid, lineFormat, M{k, :}); + end + fclose(fid); + +end + +function fieldNames = getFieldNames(catalog) + fieldNames = []; + for i=1:length(catalog) + fieldNames{i} = catalog(i).field; + end +end + +function fieldTypes = getFieldTypes(catalog) + fieldTypes = []; + for i=1:length(catalog) + fieldTypes{i} = catalog(i).type; + end +end + +function M = prepareCellMatrix(catalog) + M = {}; + for i=1:length(catalog) + val = catalog(i).val; + type = catalog(i).type; + if iscell(val) + M(:,i) = val; + elseif isTime(type) + M(:,i) = formatCatalogTime(val, type); + else + M(:,i) = num2cell(val); + end + end +end + +function timeStrVector = formatCatalogTime(timeVector, fieldType) + if iscell(timeVector) + timeVector = cell2mat(timeVector); + end + emptyIndexes = isnan(timeVector); + timeStrVector(~emptyIndexes, 1) = cellstr(datestr(timeVector(~emptyIndexes), 'yyyy-mm-dd HH:MM:SS.FFF')); + timeStrVector(emptyIndexes, 1) = { 'NaN' }; +end \ No newline at end of file diff --git a/matlab/export/extractGdfFields.m b/matlab/export/extractGdfFields.m new file mode 100644 index 0000000..a1c8693 --- /dev/null +++ b/matlab/export/extractGdfFields.m @@ -0,0 +1,16 @@ +% +% ----------------- +% Copyright © 2020 ACK Cyfronet AGH, Poland. +% +% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2 +% -------------- +% +function [varargout] = extractGdfFields(gdf, fieldNames) + + for i = 1:length(fieldNames) + fieldName = fieldNames{i}; + assert(isfield(gdf.d, fieldName), ['gdf has no field with name: ' fieldName]) + varargout{i} = getfield(gdf.d, fieldName); + end + +end \ No newline at end of file diff --git a/matlab/export/formatTime.m b/matlab/export/formatTime.m new file mode 100644 index 0000000..61916ec --- /dev/null +++ b/matlab/export/formatTime.m @@ -0,0 +1,46 @@ +% ----------------- +% Copyright © 2022 ACK Cyfronet AGH, Poland. +% ----------------- +% +% Get string format for the time values provided in timeVector. Format is based +% on numeric 'FieldType' (GDF) or 'type' (catalog) field +% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description +function timeStrVector = formatTime(timeVector, fieldType) + % if we have '5a' and '5b', the date value is a string instead of a number) + if (ischar(fieldType) && (strcmp(fieldType, '5a') || strcmp(fieldType, '5b'))) + timeStrVector = timeVector; + return; + end + if iscell(timeVector) + timeVector = cell2mat(timeVector); + end + emptyIndexes = isnan(timeVector); + timeFormat = getTimeFormat(fieldType); + timeStrVector(~emptyIndexes, 1) = cellstr(datestr(timeVector(~emptyIndexes), timeFormat)); + timeStrVector(emptyIndexes, 1) = { 'NaN' }; +end + +function timeFormat = getTimeFormat(fieldType) + if (fieldType == 5) + timeFormat = 'yyyy mmm dd HH:MM:SS.FFF'; + else + timeFormat = arrayfun(@changeToMatlabFormat, fieldType(2:end)); + end +end + +% Matlab uses different specification of the format than the one used in GDF files (based on Java format) - e.g., 'M' is +% used in GDF for month, while in Matlab it is used for minutes +function correctedFormatId = changeToMatlabFormat(javaTimeFormatId) + switch javaTimeFormatId + case 'M' + correctedFormatId = 'm'; + case 'm' + correctedFormatId = 'M'; + case 's' + correctedFormatId = 'S'; + case 'S' + correctedFormatId = 'F'; + otherwise + correctedFormatId = javaTimeFormatId; + end +end diff --git a/matlab/export/gdf2csv.m b/matlab/export/gdf2csv.m new file mode 100644 index 0000000..ebd4656 --- /dev/null +++ b/matlab/export/gdf2csv.m @@ -0,0 +1,142 @@ +% ----------------- +% Copyright © 2020 ACK Cyfronet AGH, Poland. +% +% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2 +% ----------------- +function csvFiles = gdf2csv(gdfFilePath) + + csvFiles = {}; + load(gdfFilePath); + [~, resultFileNameBase] = fileparts(gdfFilePath); + + fieldNames = fieldnames(d); + fieldTypes = getFieldTypes(FieldType, fieldNames); + + if (hasSingleData(d, fieldNames)) + if (length(d) > 1) + M = prepareCellMatrixFromStructArrayWithScalars(d, fieldTypes); + else + M = prepareCellMatrixFromSingleStructWithVectors(d, fieldTypes, fieldNames); + end + resultFileName = [resultFileNameBase, '.csv']; + saveCsvFile(M, fieldTypes, fieldNames, resultFileName); + csvFiles = { resultFileName }; + else + for i = 1:length(d) + resultFileName = [resultFileNameBase, '-', num2str(i), '.csv']; + M = prepareCellMatrixFromSingleStructWithVectors(d(i), fieldTypes, fieldNames); + saveCsvFile(M, fieldTypes, fieldNames, resultFileName); + csvFiles{i} = resultFileName; + end + end +end + +function isSingle = hasSingleData(d, fieldNames) + isSingle = length(d) == 1 || ~hasAnyVectors(d, fieldNames); +end + +function hasVector = hasAnyVectors(d, fieldNames) + hasVector = false; + for i = 1:length(d) + vectorValue = findFirstVectorValue(d(i), fieldNames); + if ~isempty(vectorValue) + hasVector = true; + return; + end + end +end + +function vectorValue = findFirstVectorValue(d, fieldNames) + vectorValue = []; + for i = 1:length(fieldNames) + value = d.(fieldNames{i}); + if ~ischar(value) && ~isscalar(value) + vectorValue = value; + return; + end + end +end + +function saveCsvFile(M, fieldTypes, fieldNames, filename) + lineFormat = getLineFormat(fieldTypes); + fid = fopen(filename, 'w+'); + fprintf(fid, '%s\n', strjoin(fieldNames', ',')); + for k=1:size(M,1) + fprintf(fid, lineFormat, M{k, :}); + end + fclose(fid); +end + +function M = prepareCellMatrixFromSingleStructWithVectors(d, fieldTypes, fieldNames) + % some structures might contain vectors mixed with scalars, in that case we want to repeat the scalar values in the csv file + d = convertScalarsToVectors(d, fieldNames); + M = cell(length(d.(fieldNames{1})), length(fieldNames)); + for f = 1:length(fieldTypes) + fieldType = fieldTypes{f}; + field = d.(fieldNames{f}); + if (isTime(fieldType)) + M(:, f) = formatTime(field, fieldType); + elseif isnumeric(field) + if (isrow(field)) + field = field'; + end + M(:, f) = num2cell(field); + else + if (isrow(field)) + field = field'; + end + M(:, f) = field; + end + end +end + +function M = prepareCellMatrixFromStructArrayWithScalars(d, fieldTypes) + M = squeeze(struct2cell(d))'; + M = cellfun(@handleEmptyArray, M, 'UniformOutput', false); + for f = 1:length(fieldTypes) + fieldType = fieldTypes{f}; + if (isTime(fieldType)) + M(:, f) = formatTime(M(:, f), fieldType); + end + end +end + +function struct = convertScalarsToVectors(d, fieldNames) + struct = d; + firstVectorValue = findFirstVectorValue(d, fieldNames); + if isempty(firstVectorValue) + return; + end + count = length(firstVectorValue); + for i = 1:length(fieldNames) + field = struct.(fieldNames{i}); + if (isempty(field)) + field = nan; + end + if ischar(field) || isscalar(field) + [vectorValue{1:count}] = deal(field); + struct.(fieldNames{i}) = vectorValue'; + end + end +end + +function value = handleEmptyArray(value) + if (isempty(value)) + value = nan; + end +end + +% creates a vector of field types written as in FieldType, but preserving the same order of fields as in the 'd' +% structure (fallback for a situation when the types in FieldType are in different order than fieldnames(d) or when +% FieldType contains more entries than fieldnames(d) +function fieldTypes = getFieldTypes(fieldTypeCell, fieldNames) + fieldTypes = cell(1, length(fieldNames)); + for i = 1:length(fieldNames) + for j = 1:size(fieldTypeCell, 1) + if strcmp(fieldNames{i}, fieldTypeCell{j, 1}) + fieldTypes{i} = fieldTypeCell{j, 2}; + end + end + end +end + diff --git a/matlab/export/getLineFormat.m b/matlab/export/getLineFormat.m new file mode 100644 index 0000000..5f4be31 --- /dev/null +++ b/matlab/export/getLineFormat.m @@ -0,0 +1,54 @@ +% ----------------- +% Copyright © 2022 ACK Cyfronet AGH, Poland. +% ----------------- +% +% Get string format specification used for fpritf, based on numeric 'FieldType' (GDF) or 'type' (catalog) field +% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description +function lineFormat = getLineFormat(fieldTypes) + lineFormat = ''; + for f = 1:length(fieldTypes) + fieldType = fieldTypes{f}; + formatSpec = getFormatSpec(fieldType); + lineFormat = [lineFormat ',' formatSpec]; + end + lineFormat = [lineFormat(2:end) '\n']; +end + +function formatSpec = getFormatSpec(fieldType) + if isTime(fieldType) + formatSpec = '"%s"'; + elseif isnumeric(fieldType) + if fieldType < 10 + formatSpec = getFormatSpecFromReservedFieldType(fieldType); + elseif fieldType < 200 + formatSpec = ['%.', num2str(mod(fieldType, 10)), 'f']; + elseif fieldType < 300 + formatSpec = ['%.', num2str(mod(fieldType, 10)), 'e']; + else + error(['Unrecognized fieldType: ', num2str(fieldType)]) + end + else + error(['Unrecognized fieldType: ', num2str(fieldType)]) + end +end + +function formatSpec = getFormatSpecFromReservedFieldType(fieldType) + switch fieldType + case 1 + formatSpec = '%f'; + case 2 + formatSpec = '%d'; + case 3 + formatSpec = '"%s"'; + case 4 + formatSpec = '%.1f'; + case 5 + formatSpec = '%s'; + case 6 + formatSpec = '%1.1e'; + case 7 + formatSpec = '%1.2e'; + otherwise + error(['Unrecognized fieldType: ', num2str(fieldType)]) + end +end diff --git a/matlab/export/isTime.m b/matlab/export/isTime.m new file mode 100644 index 0000000..749f980 --- /dev/null +++ b/matlab/export/isTime.m @@ -0,0 +1,10 @@ +% ----------------- +% Copyright © 2022 ACK Cyfronet AGH, Poland. +% ----------------- +% +% Check if the format defined in 'fieldType' marks time or other type of field. +% Based on numeric 'FieldType' (GDF) or 'type' (catalog) field +% see https://docs.cyfronet.pl/display/ISEPOS/GDF+v2.2+-+description +function isTime = isTime(fieldType) + isTime = fieldType == 5 || fieldType(1) == '5'; +end