Add code snippets
This commit is contained in:
110
matlab/csvconverter/csv2catalog.m
Normal file
110
matlab/csvconverter/csv2catalog.m
Normal file
@@ -0,0 +1,110 @@
|
||||
function [Catalog] = csv2catalog(csvFilePath, column_desc, idPrefix, sortCatalogByTime, useCsvHeader)
|
||||
% DESCRIPTION: Program to create the catalogue v2.0x in the
|
||||
% Matlab format file from csv file.
|
||||
% INPUTS:
|
||||
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ‘,'.
|
||||
% Description of the most popular catalog fields can be found at https://docs.cyfronet.pl/display/ISDOC/Catalog+-+description
|
||||
% - column_desc : structure containing information how the catalog should be created
|
||||
% - include: whether the field should be included in the resulting catalog
|
||||
% - nameInCsv: name of the column in CSV file
|
||||
% - inputType: type of the column in CSV file (REAL/INTEGER/TEXT/DATE_TIME/DATE/DATE_DAY/DATE_MONTH/DATE_YEAR/TIME)
|
||||
% - inputTimeFormat: input format for reading time (if 'inputType' is one of time options)
|
||||
% - inputTimeGroup: input group for merging dates
|
||||
% - nameInCatalog: name of the column that should be insterted into resulting catalog
|
||||
% - description: description of the column in the resulting catalog
|
||||
% - format: format (display format) of the column in the resulting catalog
|
||||
% - unit: unit of the column in the resulting catalog
|
||||
% - fieldType: type of the column in the resulting catalog (e.g. Magnitude, Energy)
|
||||
% - idPrefix : prefix of the ID column if the IDs should be generated (if the catalog doesn't contain
|
||||
% the id or the column is not included ('include' is set to false)
|
||||
|
||||
%TODO handle multiple time columns
|
||||
%TODO the script fails if any of the rows has an empty value at the end (but empty quotes is ok)
|
||||
|
||||
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
|
||||
colCount = length(column_desc);
|
||||
noCsvHeaderModifier = 1;
|
||||
if useCsvHeader
|
||||
noCsvHeaderModifier = 0;
|
||||
end
|
||||
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
|
||||
k = 1; % column number in the generated catalog
|
||||
|
||||
if ~contains_id(column_desc)
|
||||
if isempty(idPrefix)
|
||||
[~, idPrefix] = fileparts(csvFilePath);
|
||||
end
|
||||
Catalog(k).field = 'ID';
|
||||
Catalog(k).type = 3;
|
||||
for row = 1 : rowCount
|
||||
Catalog(k).val(row, 1) = { strcat(idPrefix, '_', num2str(row,'%04.f')) };
|
||||
end
|
||||
Catalog(k).unit = [];
|
||||
Catalog(k).description = 'Event ID';
|
||||
Catalog(k).fieldType = [];
|
||||
k = 2;
|
||||
end
|
||||
|
||||
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
|
||||
|
||||
for col=1:colCount
|
||||
current_col = column_desc(col);
|
||||
if ~current_col.include || ismember(col, columnsNotToInclude)
|
||||
continue;
|
||||
end
|
||||
|
||||
inputTimeGroup = current_col.inputTimeGroup;
|
||||
if ~isempty(current_col.inputTimeGroup)
|
||||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||||
for columnInGroup = 2 : length(timeGroupArray)
|
||||
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
|
||||
end
|
||||
end
|
||||
|
||||
Catalog(k).field = current_col.nameInCatalog;
|
||||
Catalog(k).type = current_col.format;
|
||||
for row = 1 : rowCount
|
||||
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
|
||||
if ~isempty(current_col.inputTimeGroup)
|
||||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||||
for columnInGroup = 2 : length(timeGroupArray)
|
||||
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
|
||||
end
|
||||
end
|
||||
if isempty(rawValue)
|
||||
if strcmp(current_col.nameInCatalog, 'ID')
|
||||
error('ID of the event cannot be empty (row: %d)', row)
|
||||
elseif isText(current_col)
|
||||
Catalog(k).val(row, 1) = {''};
|
||||
else
|
||||
Catalog(k).val(row, 1) = NaN;
|
||||
end
|
||||
else
|
||||
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
|
||||
if strcmp(current_col.format, '5a')
|
||||
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy') };
|
||||
elseif strcmp(current_col.format, '5b')
|
||||
Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy-mm') };
|
||||
else
|
||||
Catalog(k).val(row, 1) = parsedValue;
|
||||
end
|
||||
end
|
||||
end
|
||||
Catalog(k).unit = current_col.unit;
|
||||
Catalog(k).description = current_col.description;
|
||||
Catalog(k).fieldType = current_col.fieldType;
|
||||
k=k+1;
|
||||
end
|
||||
if sortCatalogByTime
|
||||
Catalog = sortByTime(Catalog);
|
||||
end
|
||||
end
|
||||
|
||||
function containsId = contains_id(column_desc)
|
||||
idIdxs = find(strcmp(column_desc(1).nameInCatalog, 'ID'));
|
||||
if isempty(idIdxs)
|
||||
containsId = 0;
|
||||
else
|
||||
containsId = column_desc(idIdxs).include;
|
||||
end
|
||||
end
|
88
matlab/csvconverter/csv2gdf.m
Normal file
88
matlab/csvconverter/csv2gdf.m
Normal file
@@ -0,0 +1,88 @@
|
||||
% -----------------
|
||||
% Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||
% -----------------
|
||||
|
||||
function [gdfFileName] = csv2gdf(csvFilePath, column_desc, description, useCsvHeader)
|
||||
% DESCRIPTION: Program to create GDF files (in Matlab format) from csv file. Performs a reverse action to the
|
||||
% gdf2csv.m script.
|
||||
% INPUTS:
|
||||
% - csvFilePath : path to text file with csv data. The separator between columns is defined by ‘,'.
|
||||
% Description of the most popular GDF file formats can be found at https://docs.cyfronet.pl/display/ISDOC/GDF
|
||||
% - column_desc : structure containing information how the gdf should be constructed
|
||||
% - description : description written into the file
|
||||
|
||||
data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader);
|
||||
colCount = length(column_desc);
|
||||
noCsvHeaderModifier = 1;
|
||||
if useCsvHeader
|
||||
noCsvHeaderModifier = 0;
|
||||
end
|
||||
rowCount = length(data) / colCount - 1 + noCsvHeaderModifier;
|
||||
|
||||
[FormatName] = 'GDF';
|
||||
[FormatVersion] = 2.1;
|
||||
[CRS] = 'n/a';
|
||||
[TimeZone] = 'UTC';
|
||||
[Description] = description;
|
||||
|
||||
[FieldDescription] = {};
|
||||
[FieldType] = {};
|
||||
[FieldUnit] = {};
|
||||
[d] = struct();
|
||||
|
||||
[columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc);
|
||||
colInGdf = 1; % column number in the generated gdf
|
||||
for col=1:colCount
|
||||
current_col = column_desc(col);
|
||||
if ~current_col.include || ismember(col, columnsNotToInclude)
|
||||
continue;
|
||||
end
|
||||
|
||||
inputTimeGroup = current_col.inputTimeGroup;
|
||||
if ~isempty(current_col.inputTimeGroup)
|
||||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||||
for columnInGroup = 2 : length(timeGroupArray)
|
||||
current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat];
|
||||
end
|
||||
end
|
||||
|
||||
fieldName = current_col.nameInCatalog;
|
||||
FieldDescription(colInGdf, 1) = fieldName;
|
||||
FieldDescription(colInGdf, 2) = current_col.description;
|
||||
FieldType(colInGdf, 1) = fieldName;
|
||||
FieldType(colInGdf, 2) = current_col.format;
|
||||
FieldUnit(colInGdf, 1) = fieldName;
|
||||
FieldUnit(colInGdf, 2) = current_col.unit;
|
||||
d.(fieldName) = [];
|
||||
for row = 1 : rowCount
|
||||
rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1};
|
||||
if ~isempty(current_col.inputTimeGroup)
|
||||
timeGroupArray = columnsInTimeGroup(inputTimeGroup);
|
||||
for columnInGroup = 2 : length(timeGroupArray)
|
||||
rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}];
|
||||
end
|
||||
end
|
||||
if isempty(rawValue)
|
||||
if isText(current_col)
|
||||
d.(fieldName)(row) = {''};
|
||||
else
|
||||
d.(fieldName)(row) = NaN;
|
||||
end
|
||||
else
|
||||
parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat);
|
||||
if strcmp(current_col.format, '5a')
|
||||
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy') };
|
||||
elseif strcmp(current_col.format, '5b')
|
||||
d.(fieldName)(row) = { datestr(parsedValue, 'yyyy-mm') };
|
||||
else
|
||||
d.(fieldName)(row) = parsedValue;
|
||||
end
|
||||
end
|
||||
end
|
||||
colInGdf = colInGdf + 1;
|
||||
end
|
||||
|
||||
[~, gdfFileName, ~] = fileparts(csvFilePath);
|
||||
save(strcat(gdfFileName, '.mat'), 'FormatName', 'FormatVersion', 'CRS', 'TimeZone', 'Description', ...
|
||||
'FieldDescription', 'FieldType', 'FieldUnit', 'd', '-v7')
|
||||
end
|
23
matlab/csvconverter/getTimeGroups.m
Normal file
23
matlab/csvconverter/getTimeGroups.m
Normal file
@@ -0,0 +1,23 @@
|
||||
% -----------------
|
||||
% Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||
% -----------------
|
||||
function [columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc)
|
||||
% DESCRIPTION: Script iterating through column_desc and returning column indexes grouped by the same
|
||||
% inputTimeGroup. The second output is array of all the other columns indexes than first in their own respective time group
|
||||
% INPUTS:
|
||||
% - column_desc : structure containing definition of the CSV columns and their mapping to the final object
|
||||
columnsInTimeGroup = containers.Map();
|
||||
columnsNotToInclude = [];
|
||||
|
||||
for i=1:length(column_desc)
|
||||
inputTimeGroup = column_desc(i).inputTimeGroup;
|
||||
if ~isempty(inputTimeGroup)
|
||||
if ~ismember(inputTimeGroup, columnsInTimeGroup.keys)
|
||||
columnsInTimeGroup(inputTimeGroup) = [i];
|
||||
else
|
||||
columnsInTimeGroup(inputTimeGroup) = cat(1, columnsInTimeGroup(inputTimeGroup), i);
|
||||
columnsNotToInclude = cat(1, columnsNotToInclude, i);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
10
matlab/csvconverter/isText.m
Normal file
10
matlab/csvconverter/isText.m
Normal file
@@ -0,0 +1,10 @@
|
||||
% -----------------
|
||||
% Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||
% -----------------
|
||||
|
||||
function isText = isText(col_desc)
|
||||
% DESCRIPTION: Function checking if given column is of text type
|
||||
% INPUTS:
|
||||
% - col_desc : structure containing information how the column should be constructed
|
||||
isText = strcmp(col_desc.inputType, 'TEXT') | strcmp(col_desc.format, '5a') | strcmp(col_desc.format, '5b');
|
||||
end
|
35
matlab/csvconverter/parseTextValue.m
Normal file
35
matlab/csvconverter/parseTextValue.m
Normal file
@@ -0,0 +1,35 @@
|
||||
% -----------------
|
||||
% Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||
% -----------------
|
||||
|
||||
function parsedValue = parseTextValue(rawValue, type, timeFormat)
|
||||
% DESCRIPTION: Program that parses and returns value read from the text (a cell from a CSV file).
|
||||
% INPUTS:
|
||||
% - rawValue : value to parse
|
||||
% - type : type of the value as defined by CsvColumnContentType.java
|
||||
% - timeFormat : if the rawValue contains time, this format is used to parse it
|
||||
|
||||
switch type
|
||||
case {'REAL', 'INTEGER'}
|
||||
try
|
||||
parsedValue = str2num(rawValue);
|
||||
catch
|
||||
error('Cannot parse number input (type: %s): %s', type, rawValue);
|
||||
end
|
||||
if isempty(parsedValue)
|
||||
% we checked if the value is empty before parsing (and such value will not be parsed), if the value is empty
|
||||
% here (after parsing), it means that it was in a wrong format and could not be parsed
|
||||
error('Cannot parse number input (type: %s): %s', type, rawValue);
|
||||
end
|
||||
case 'TEXT'
|
||||
parsedValue = {rawValue};
|
||||
case 'DATE_TIME'
|
||||
try
|
||||
parsedValue = datenum(rawValue, timeFormat);
|
||||
catch
|
||||
error('Invalid input time format specification or CSV content to parse (%s)', rawValue);
|
||||
end
|
||||
otherwise
|
||||
error('Unexpected input column type %s', type);
|
||||
end
|
||||
end
|
33
matlab/csvconverter/readAndCheckHeaders.m
Normal file
33
matlab/csvconverter/readAndCheckHeaders.m
Normal file
@@ -0,0 +1,33 @@
|
||||
% -----------------
|
||||
% Copyright © 2023 ACK Cyfronet AGH, Poland.
|
||||
% -----------------
|
||||
|
||||
function data = readAndCheckHeaders(csvFilePath, column_desc, doCheckHeaders)
|
||||
% DESCRIPTION: Program that reads content from the CSV file, checking if the content matches the headers defined
|
||||
% in the column_desc structure. The returned value is a cell with all values from the csv file.
|
||||
% INPUTS:
|
||||
% - csvFilePath : path to the CSV file
|
||||
% - column_desc : structure containing definition of the CSV columns and their mapping to the final object
|
||||
|
||||
fid = fopen(csvFilePath);
|
||||
data = textscan(fid, '%q', 'Delimiter', ','){1}; % cell with all values from the csv file
|
||||
fclose(fid);
|
||||
if doCheckHeaders
|
||||
check_headers(data, column_desc);
|
||||
end
|
||||
end
|
||||
|
||||
function check_headers(data, column_desc)
|
||||
colCount = length(column_desc);
|
||||
headers = data(1:colCount);
|
||||
for i=1:colCount
|
||||
if ~strcmp(column_desc(i).nameInCsv, headers(i))
|
||||
error('Expected column %s, but found %s in CSV headers', column_desc(i).nameInCsv, char(headers(i)));
|
||||
end
|
||||
end
|
||||
|
||||
if mod(length(data), colCount) ~= 0
|
||||
error('Improper number of values in one of the rows');
|
||||
end
|
||||
|
||||
end
|
Reference in New Issue
Block a user