From 064a0f75cbc215a98068b9276eefd7039e2f4f9d Mon Sep 17 00:00:00 2001 From: Mieszko Makuch Date: Wed, 28 Aug 2024 17:22:21 +0200 Subject: [PATCH] Add code snippets --- matlab/catalog/CatalogFilterNaN.m | 40 ++++ matlab/catalog/CatalogSelectFields.m | 55 +++++ matlab/catalog/CatalogSelectRange.m | 83 ++++++++ matlab/catalog/eliminateEmptyValues.m | 24 +++ matlab/catalog/extractColumns.m | 25 +++ matlab/catalog/findCatalogColumn.m | 19 ++ matlab/catalog/merge2Catalog.m | 100 +++++++++ matlab/catalog/modifyCatalog.m | 90 ++++++++ matlab/catalog/sortByTime.m | 10 + matlab/catalog/test | 1 + matlab/csvconverter/csv2catalog.m | 110 ++++++++++ matlab/csvconverter/csv2gdf.m | 88 ++++++++ matlab/csvconverter/getTimeGroups.m | 23 ++ matlab/csvconverter/isText.m | 10 + matlab/csvconverter/parseTextValue.m | 35 +++ matlab/csvconverter/readAndCheckHeaders.m | 33 +++ matlab/m2m/LinReg.m | 186 ++++++++++++++++ matlab/m2m/LinRegWrapper.m | 32 +++ matlab/m2m/LinReg_O.m | 186 ++++++++++++++++ matlab/multipletidentification/readsac.m | 287 +++++++++++++++++++++++++ matlab/multipletidentification/sac2ascii.m | 69 ++++++ matlab/unitutils/deg2km.m | 25 +++ matlab/unitutils/deg2rad.m | 12 ++ matlab/unitutils/km2deg.m | 25 +++ matlab/unitutils/rad2deg.m | 12 ++ matlab/unitutils/second.m | 65 ++++++ matlab/unitutils/wavecut.m | 7 + python/phaseassoc/catalogConverter.py | 120 +++++++++++ python/seedconverter/inventoryconverter.py | 62 ++++++ python/seedconverter/sacutil.py | 98 +++++++++ python/seedconverter/seedconverter.py | 327 +++++++++++++++++++++++++++++ python/seedconverter/unit.py | 95 +++++++++ 32 files changed, 2354 insertions(+) create mode 100644 matlab/catalog/CatalogFilterNaN.m create mode 100644 matlab/catalog/CatalogSelectFields.m create mode 100644 matlab/catalog/CatalogSelectRange.m create mode 100644 matlab/catalog/eliminateEmptyValues.m create mode 100644 matlab/catalog/extractColumns.m create mode 100644 matlab/catalog/findCatalogColumn.m create mode 100644 matlab/catalog/merge2Catalog.m create mode 100644 matlab/catalog/modifyCatalog.m create mode 100644 matlab/catalog/sortByTime.m create mode 100644 matlab/catalog/test create mode 100644 matlab/csvconverter/csv2catalog.m create mode 100644 matlab/csvconverter/csv2gdf.m create mode 100644 matlab/csvconverter/getTimeGroups.m create mode 100644 matlab/csvconverter/isText.m create mode 100644 matlab/csvconverter/parseTextValue.m create mode 100644 matlab/csvconverter/readAndCheckHeaders.m create mode 100644 matlab/m2m/LinReg.m create mode 100644 matlab/m2m/LinRegWrapper.m create mode 100644 matlab/m2m/LinReg_O.m create mode 100644 matlab/multipletidentification/readsac.m create mode 100644 matlab/multipletidentification/sac2ascii.m create mode 100644 matlab/unitutils/deg2km.m create mode 100644 matlab/unitutils/deg2rad.m create mode 100644 matlab/unitutils/km2deg.m create mode 100644 matlab/unitutils/rad2deg.m create mode 100644 matlab/unitutils/second.m create mode 100644 matlab/unitutils/wavecut.m create mode 100644 python/phaseassoc/catalogConverter.py create mode 100644 python/seedconverter/inventoryconverter.py create mode 100644 python/seedconverter/sacutil.py create mode 100644 python/seedconverter/seedconverter.py create mode 100644 python/seedconverter/unit.py diff --git a/matlab/catalog/CatalogFilterNaN.m b/matlab/catalog/CatalogFilterNaN.m new file mode 100644 index 0000000..3b57361 --- /dev/null +++ b/matlab/catalog/CatalogFilterNaN.m @@ -0,0 +1,40 @@ +% function [Catalog] = CatalogFilterNaN(fullcatalog,...) +% Select lines/values from catalog without NaN and empty string +% Input parameters are pairs: +% names of fields +% eg. CatalogSelectRange(fullcatalog,'Mw','EID') +% +% (c) Dorota Olszewska IG PAS + +function [Catalog] = CatalogFilterNaN(fullcatalog,varargin) + Catalog = fullcatalog ; + range = true(size(fullcatalog(1).val)) ; + na_varargin = numel(varargin) ; + for i= 1:na_varargin + if ~ischar(varargin{i}) + warning(['Wrong type of parameter ' num2str(i)]) + continue + end + index = find(strcmp(varargin{i},{fullcatalog.field})) ; + if length(index) ~= 1 + warning(['Can not select field ' varargin{i}]) + continue + end + if fullcatalog(index).type == 3 + for ii=1:length(Catalog(index).val) + lrange(ii,1) = ~isempty(Catalog(index).val{ii}); + end + else + lrange = ~isnan(fullcatalog(index).val) ; + %nnel = numel(lrange) ; + end + range = range & lrange; + end + for i = 1:numel(fullcatalog) + if fullcatalog(i).type == 3 + Catalog(i).val = {fullcatalog(i).val{range}}' ; + else + Catalog(i).val = fullcatalog(i).val(range) ; + end + end +end diff --git a/matlab/catalog/CatalogSelectFields.m b/matlab/catalog/CatalogSelectFields.m new file mode 100644 index 0000000..150c3f0 --- /dev/null +++ b/matlab/catalog/CatalogSelectFields.m @@ -0,0 +1,55 @@ +% function [Catalog] = CatalogSelectFields(fullcatalog,...) +% Select fields from IS-POIG catalog v2.0 +% Produce reduced IS-POIG catalog v2.0 +% Input parameters are names of fields +% eg. CatalogSelectFields(fullcatalog,'Date','Lat','Long','Mw') +% or cells with names of fields +% eg. CatalogSelectFields(fullcatalog,{'Date','Lat','Long','Mw'}) +% +% (c) Jan Wiszniowski IG PAS +% v_2.0 19-09-2016 Dorota Olszewska +% 2021-10-22: test of inputs and error messages. +% +function [Catalog] = CatalogSelectFields(fullcatalog,varargin) + na_varargin = length(varargin) ; + no_cols = 0 ; + for i= 1:na_varargin + if iscell(varargin{i}) + cellin = varargin{i} ; + for j= 1:numel(cellin) + if ischar(cellin{j}) + if ~any(strcmp(cellin{j},{fullcatalog.field})) + error('Field %s does not exist in the catalog', cellin{j}) + end + no_cols = no_cols+1 ; + else + error('Wrong %d{%d} input argument of the CatalogSelectFields function', j, i) + end + end + elseif ischar(varargin{i}) + if ~any(strcmp(varargin{i},{fullcatalog.field})) + error('Field %s does not exist in the catalog', varargin{i}) + end + no_cols = no_cols+1 ; + else + error('Wrong %d input of the CatalogSelectFields function', i) + end + end + Catalog = struct('field',cell(1,no_cols),'type',cell(1,no_cols),'val',cell(1,no_cols),'unit',cell(1,no_cols),'description',cell(1,no_cols),'fieldType',cell(1,no_cols)) ; + no_cols = 0 ; + for i= 1:na_varargin + if iscell(varargin{i}) + cellin = varargin{i} ; + for j= 1:numel(cellin) + if ischar(cellin{j}) + no_cols = no_cols+1 ; + Catalog(no_cols) = fullcatalog(strcmp(cellin{j},{fullcatalog.field})) ; + end + end + elseif ischar(varargin{i}) + no_cols = no_cols+1 ; + Catalog(no_cols) = fullcatalog(strcmp(varargin{i},{fullcatalog.field})) ; + end + end +end + diff --git a/matlab/catalog/CatalogSelectRange.m b/matlab/catalog/CatalogSelectRange.m new file mode 100644 index 0000000..9d5ac97 --- /dev/null +++ b/matlab/catalog/CatalogSelectRange.m @@ -0,0 +1,83 @@ +% function [Catalog] = CatalogSelectRange(fullcatalog,...) +% Select lines/values from catalog +% Input parameters are pairs: +% names of fields +% and +% range of values +% for numeric there are: [minval maxval] or [val] or {val1, val2, val3, ...} +% for strings there are: 'valtext' or {'valtext1', 'valtext2', 'valtext3', ...} +% eg. CatalogSelectRange(fullcatalog,'Mw',[0 4.5]) +% +% (c) Jan Wiszniowski IG PAS +% Modyfied 2016.02.04 - numerical comparison with tolerance 1e-14 + +function [Catalog] = CatalogSelectRange(fullcatalog,varargin) + Catalog = fullcatalog ; + range = true(size(fullcatalog(1).val)) ; + na_varargin = numel(varargin) ; + for i= 1:2:na_varargin-1 + if ~ischar(varargin{i}) + warning(['Wrong type of parameter ' num2str(i)]) + continue + end + index = find(strcmp(varargin{i},{fullcatalog.field})) ; + if length(index) ~= 1 + warning(['Can not select field ' varargin{i}]) + continue + end + if fullcatalog(index).type == 3 + lrange = varargin{i+1} ; + if ischar(lrange) + range = range & strcmp(lrange,fullcatalog(index).val) ; + elseif iscell(lrange) + l_range = false(size(range)) ; + for j = 1:numel(lrange) + llrange = lrange{j} ; + if ischar(llrange) + l_range = l_range | strcmp(llrange,fullcatalog(index).val) ; + end + end + range = range & l_range ; + else + warning(['Wrong type of walues for parameter ' varargin{i}]) + end + else + lrange = varargin{i+1} ; + nnel = numel(lrange) ; + if isnumeric(lrange) + tol = abs(lrange) ./ 1e15 ; + if nnel == 2 + range = range & fullcatalog(index).val >= lrange(1)-tol(1) & fullcatalog(index).val <= lrange(2)+tol(2) ; + elseif nnel == 1 + range = range & abs(fullcatalog(index).val - lrange(1)) <= tol(1) ; + end + elseif iscell(lrange) + l_range = false(size(range)) ; + for j = 1:numel(lrange) + llrange = lrange{j} ; + tol = abs(llrange) ./ 1e15 ; + if isnumeric(llrange) + if numel(llrange) == 2 + l_range = l_range | fullcatalog(index).val >= llrange(1)-tol(1) & fullcatalog(index).val <= llrange(2)+tol(2) ; + elseif numel(llrange) == 1 + l_range = l_range | abs(range & fullcatalog(index).val - llrange(1)) <= tol(1); + end + else + warning(['Wrong type of walues for parameter ' varargin{i}]) + end + end + range = range & l_range ; + else + warning(['Wrong type of walues for parameter ' varargin{i}]) + end + end + end + for i = 1:numel(fullcatalog) + if fullcatalog(i).type == 3 + Catalog(i).val = {fullcatalog(i).val{range}}' ; + else + Catalog(i).val = fullcatalog(i).val(range) ; + end + end +end + diff --git a/matlab/catalog/eliminateEmptyValues.m b/matlab/catalog/eliminateEmptyValues.m new file mode 100644 index 0000000..1bc2a6c --- /dev/null +++ b/matlab/catalog/eliminateEmptyValues.m @@ -0,0 +1,24 @@ +% +% ----------------- +% Copyright © 2019 ACK Cyfronet AGH, Poland. +% +% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2 +% -------------- +% +function varargout = eliminateEmptyValues(varargin) + if isempty(varargin) + return; + end + indexes = 1:length(varargin{1}); + for i = 1:length(varargin) + column = varargin{i}; + if isnumeric(column) + indexes = intersect(indexes, find(~isnan(column))); + elseif iscell(column) + indexes = intersect(indexes, find(~cellfun(@isempty, column))); + end + end + for i = 1:length(varargin) + varargout{i} = varargin{i}(indexes); + end +end \ No newline at end of file diff --git a/matlab/catalog/extractColumns.m b/matlab/catalog/extractColumns.m new file mode 100644 index 0000000..c3149a7 --- /dev/null +++ b/matlab/catalog/extractColumns.m @@ -0,0 +1,25 @@ +% +% ----------------- +% Copyright © 2019 ACK Cyfronet AGH, Poland. +% +% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2 +% -------------- +% +function [varargout] = extractColumns(catalog, varargin) + for i=1:length(varargin) + colName = varargin{i}; + varargout{i} = findColumn(catalog, colName); + end +end + +function column = findColumn(catalog, colName) + for c=1:length(catalog) + if strcmp(catalog(c).field, colName) + column = catalog(c).val; + break; + end + end + if exist('column') ~= 1 + error('no column named %s', colName); + end +end \ No newline at end of file diff --git a/matlab/catalog/findCatalogColumn.m b/matlab/catalog/findCatalogColumn.m new file mode 100644 index 0000000..0b9c974 --- /dev/null +++ b/matlab/catalog/findCatalogColumn.m @@ -0,0 +1,19 @@ +% +% ----------------- +% Copyright © 2022 ACK Cyfronet AGH, Poland. +% -------------- +% +% Returns index of the column with the specified colName within the given catalog +% +% TODO use this function in extractColumns.m +function columnIdx = findCatalogColumn(catalog, colName) + for c=1:length(catalog) + if strcmp(catalog(c).field, colName) + columnIdx = c; + break; + end + end + if exist('columnIdx') ~= 1 + error('no column named %s', colName); + end +end \ No newline at end of file diff --git a/matlab/catalog/merge2Catalog.m b/matlab/catalog/merge2Catalog.m new file mode 100644 index 0000000..ecf4231 --- /dev/null +++ b/matlab/catalog/merge2Catalog.m @@ -0,0 +1,100 @@ +% function [Catalog] = merge2Catalog(catalog1,catalog2) +% Add values from 1st Catalog to 2nd +% +% v1.1 - 2021119 - sort according by time was added +% (c) Dorota Olszewska IG PAS + +function [Catalog] = merge2Catalog(Catalog,catalog2,ID) + +if sum(strcmp(ID,{Catalog.field}))+sum(strcmp(ID,{catalog2.field})) == 0 + error([ID,' is not field of both catalogs ', ID]) +end + +ID1=Catalog(strcmp(ID,{Catalog.field})).val; +ID2=catalog2(strcmp(ID,{catalog2.field})).val; + +if length(unique(ID1))~=length(ID1) || length(unique(ID2))~=length(ID2) + error([ID,' has not unique variable at both catalogs ']) +end + +no_cols = size(Catalog,2); + +for j = 1:numel(ID2) + llrange = ID2{j} ; + l_range(j) = sum(strcmp(llrange,ID1)) ; +end + +catalog2_row=CatalogSelectRange(catalog2,ID,catalog2(strcmp(ID,{catalog2.field})).val(l_range==0)); + + +% add rows by RID +no_rows_row = size(catalog2_row(1).val,1); +if no_rows_row == 0 +else +for i=1:no_cols + if Catalog(i).type == 3 + if sum(strcmp(Catalog(i).field,{catalog2_row.field})) == 0 + Catalog(i).val=[Catalog(1).val;repmat({'NaN'},no_rows_row,1)]; + else + Catalog(i).val=[Catalog(i).val;catalog2_row(strcmp(Catalog(i).field,{catalog2_row.field})).val]; + end + else + if sum(strcmp(Catalog(i).field,{catalog2_row.field})) == 0 + Catalog(i).val=[Catalog(i).val;NaN(no_rows_row,1)]; + else + Catalog(i).val=[Catalog(i).val;catalog2_row(strcmp(Catalog(i).field,{catalog2_row.field})).val]; + end + end + +end +end + +ID1=Catalog(strcmp(ID,{Catalog.field})).val; +ID2=catalog2(strcmp(ID,{catalog2.field})).val; + +no_cols = size(Catalog,2); +no_rows = size(Catalog(1).val,1); + +for j = 1:numel(ID2) + llrange = ID2{j} ; + l_range(j) = sum(strcmp(llrange,ID1)) ; +end + +catalog2_col=CatalogSelectRange(catalog2,ID,catalog2(strcmp(ID,{catalog2.field})).val(l_range==1)); + + +% add columns by RID +no_rows_col = size(catalog2_col(1).val,1); +if no_rows_col == 0 +else + +column1={Catalog.field}; +column2={catalog2_col.field}; + + +for j = 1:numel(column2) + llrange2 = column2{j} ; + l_range2(j) = sum(strcmp(llrange2,column1)) ; +end + +new_column=column2(l_range2==0); + +for k=1:numel(new_column) + Catalog(no_cols+k).field = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).field; + Catalog(no_cols+k).type = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).type; + Catalog(no_cols+k).unit = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).unit; + Catalog(no_cols+k).description = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).description; + Catalog(no_cols+k).fieldType = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).fieldType; + for kk=1:no_rows + rr=strcmp(Catalog(strcmp(ID,{Catalog.field})).val(kk),catalog2_col(strcmp(ID,{catalog2_col.field})).val); + if sum(rr) ==0 + Catalog(no_cols+k).val(kk,1)=NaN; + else + Catalog(no_cols+k).val(kk,1) = catalog2_col(strcmp(new_column{k},{catalog2_col.field})).val(rr); + end + end +end + +Catalog = sortByTime(Catalog); + +end \ No newline at end of file diff --git a/matlab/catalog/modifyCatalog.m b/matlab/catalog/modifyCatalog.m new file mode 100644 index 0000000..3a5c873 --- /dev/null +++ b/matlab/catalog/modifyCatalog.m @@ -0,0 +1,90 @@ + % + % ----------------- + % Copyright © 2022 ACK Cyfronet AGH, Poland. + % + % Licensed under the Apache License, Version 2.0 (the "License"); + % you may not use this file except in compliance with the License. + % You may obtain a copy of the License at + % + % http://www.apache.org/licenses/LICENSE-2.0 + % + % Unless required by applicable law or agreed to in writing, software + % distributed under the License is distributed on an "AS IS" BASIS, + % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + % See the License for the specific language governing permissions and + % limitations under the License. + % + % This work was partially funded by EPOS-SP Project. + % ----------------- + % + % Modifies event with the given 'eventId' in the catalog. If such event was not yet present in the catalog, + % it is added at the correct position with respect to time (the event is added at the end and the resulting + % catalog is sorted by time) + % +function outCatalog = modifyCatalog(catalog, eventId, eventValues) + newColIdxs = findAbsentFields({eventValues.field}, {catalog.field}); + if ~isempty(newColIdxs) + % add columns that were not yet present in the catalog, but are specified in eventValues + outCatalog = insertColumns(catalog, eventValues, newColIdxs); + else + outCatalog = catalog; + end + idColumn = outCatalog(1).val; % assuming ID is always the first column + eventIdx = find(strcmp(idColumn, eventId)); + isNewEvent = 0; + if isempty(eventIdx) + isNewEvent = 1; + eventIdx = length(idColumn) + 1; + outCatalog(1).val(eventIdx) = eventId; + end + for e=1:length(eventValues) + colIdx = findCatalogColumn(outCatalog, eventValues(e).field); + outCatalog(colIdx).val(eventIdx) = eventValues(e).value; + end + if isNewEvent + % fill fields that were not specified in eventValues with empty values + absentColIdxs = findAbsentFields({outCatalog.field}, {eventValues.field}); + if ~isempty(absentColIdxs) + for i=2:length(absentColIdxs) % skip 1 (ID column index) + absColIdx = absentColIdxs(i); + outCatalog(absColIdx).val(eventIdx) = getEmptyFieldValue(outCatalog(absColIdx).type, 1); + end + end + end + outCatalog = sortByTime(outCatalog); +end + +% find Catalog columns that were not used in eventValues +function colIdxs = findAbsentFields(catalogFields, eventValuesFields) + colIdxs = []; + for c=1:length(catalogFields) + if ~ismember(catalogFields(c), eventValuesFields) + colIdxs(end + 1) = c; + end + end +end + +% get value that should be used as empty field for the given column type +function empty = getEmptyFieldValue(type, fieldsNo) + if (type == 3) + empty = cell(fieldsNo, 1); + else + empty = nan(fieldsNo, 1); + end +end + +function outCatalog = insertColumns(catalog, eventValues, newColIdxs) + outCatalog = catalog; + rowsNo = length(catalog(1).val); + colsNo = length(catalog); + for i=1:length(newColIdxs) + newColIdx = colsNo+i; + eventFields = eventValues(newColIdxs(i)); + outCatalog(newColIdx).field = eventFields.field; + outCatalog(newColIdx).type = eventFields.type; + outCatalog(newColIdx).unit = eventFields.unit; + outCatalog(newColIdx).description = eventFields.description; + outCatalog(newColIdx).fieldType = eventFields.fieldType; + outCatalog(newColIdx).val = getEmptyFieldValue(eventFields.type, rowsNo); + end +end diff --git a/matlab/catalog/sortByTime.m b/matlab/catalog/sortByTime.m new file mode 100644 index 0000000..c680e94 --- /dev/null +++ b/matlab/catalog/sortByTime.m @@ -0,0 +1,10 @@ +function [sortedCatalog] = sortByTime(catalog) + timeColIndex = find(strcmp('Time', {catalog.field})); + if ~isempty(timeColIndex) + [~, sortedIndexes] = sortrows(catalog(timeColIndex).val); + for i=1:length(catalog) + catalog(i).val = catalog(i).val(sortedIndexes); + end + end + sortedCatalog = catalog; +end \ No newline at end of file diff --git a/matlab/catalog/test b/matlab/catalog/test new file mode 100644 index 0000000..74bd137 --- /dev/null +++ b/matlab/catalog/test @@ -0,0 +1 @@ +filterByMminAndTime \ No newline at end of file diff --git a/matlab/csvconverter/csv2catalog.m b/matlab/csvconverter/csv2catalog.m new file mode 100644 index 0000000..ad88461 --- /dev/null +++ b/matlab/csvconverter/csv2catalog.m @@ -0,0 +1,110 @@ +function [Catalog] = csv2catalog(csvFilePath, column_desc, idPrefix, sortCatalogByTime, useCsvHeader) +% DESCRIPTION: Program to create the catalogue v2.0x in the +% Matlab format file from csv file. +% INPUTS: +% - csvFilePath : path to text file with csv data. The separator between columns is defined by ‘,'. +% Description of the most popular catalog fields can be found at https://docs.cyfronet.pl/display/ISDOC/Catalog+-+description +% - column_desc : structure containing information how the catalog should be created +% - include: whether the field should be included in the resulting catalog +% - nameInCsv: name of the column in CSV file +% - inputType: type of the column in CSV file (REAL/INTEGER/TEXT/DATE_TIME/DATE/DATE_DAY/DATE_MONTH/DATE_YEAR/TIME) +% - inputTimeFormat: input format for reading time (if 'inputType' is one of time options) +% - inputTimeGroup: input group for merging dates +% - nameInCatalog: name of the column that should be insterted into resulting catalog +% - description: description of the column in the resulting catalog +% - format: format (display format) of the column in the resulting catalog +% - unit: unit of the column in the resulting catalog +% - fieldType: type of the column in the resulting catalog (e.g. Magnitude, Energy) +% - idPrefix : prefix of the ID column if the IDs should be generated (if the catalog doesn't contain +% the id or the column is not included ('include' is set to false) + + %TODO handle multiple time columns + %TODO the script fails if any of the rows has an empty value at the end (but empty quotes is ok) + + data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader); + colCount = length(column_desc); + noCsvHeaderModifier = 1; + if useCsvHeader + noCsvHeaderModifier = 0; + end + rowCount = length(data) / colCount - 1 + noCsvHeaderModifier; + k = 1; % column number in the generated catalog + + if ~contains_id(column_desc) + if isempty(idPrefix) + [~, idPrefix] = fileparts(csvFilePath); + end + Catalog(k).field = 'ID'; + Catalog(k).type = 3; + for row = 1 : rowCount + Catalog(k).val(row, 1) = { strcat(idPrefix, '_', num2str(row,'%04.f')) }; + end + Catalog(k).unit = []; + Catalog(k).description = 'Event ID'; + Catalog(k).fieldType = []; + k = 2; + end + + [columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc); + + for col=1:colCount + current_col = column_desc(col); + if ~current_col.include || ismember(col, columnsNotToInclude) + continue; + end + + inputTimeGroup = current_col.inputTimeGroup; + if ~isempty(current_col.inputTimeGroup) + timeGroupArray = columnsInTimeGroup(inputTimeGroup); + for columnInGroup = 2 : length(timeGroupArray) + current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat]; + end + end + + Catalog(k).field = current_col.nameInCatalog; + Catalog(k).type = current_col.format; + for row = 1 : rowCount + rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1}; + if ~isempty(current_col.inputTimeGroup) + timeGroupArray = columnsInTimeGroup(inputTimeGroup); + for columnInGroup = 2 : length(timeGroupArray) + rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}]; + end + end + if isempty(rawValue) + if strcmp(current_col.nameInCatalog, 'ID') + error('ID of the event cannot be empty (row: %d)', row) + elseif isText(current_col) + Catalog(k).val(row, 1) = {''}; + else + Catalog(k).val(row, 1) = NaN; + end + else + parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat); + if strcmp(current_col.format, '5a') + Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy') }; + elseif strcmp(current_col.format, '5b') + Catalog(k).val(row, 1) = { datestr(parsedValue, 'yyyy-mm') }; + else + Catalog(k).val(row, 1) = parsedValue; + end + end + end + Catalog(k).unit = current_col.unit; + Catalog(k).description = current_col.description; + Catalog(k).fieldType = current_col.fieldType; + k=k+1; + end + if sortCatalogByTime + Catalog = sortByTime(Catalog); + end +end + +function containsId = contains_id(column_desc) + idIdxs = find(strcmp(column_desc(1).nameInCatalog, 'ID')); + if isempty(idIdxs) + containsId = 0; + else + containsId = column_desc(idIdxs).include; + end +end \ No newline at end of file diff --git a/matlab/csvconverter/csv2gdf.m b/matlab/csvconverter/csv2gdf.m new file mode 100644 index 0000000..4b156f6 --- /dev/null +++ b/matlab/csvconverter/csv2gdf.m @@ -0,0 +1,88 @@ +% ----------------- +% Copyright © 2023 ACK Cyfronet AGH, Poland. +% ----------------- + +function [gdfFileName] = csv2gdf(csvFilePath, column_desc, description, useCsvHeader) +% DESCRIPTION: Program to create GDF files (in Matlab format) from csv file. Performs a reverse action to the +% gdf2csv.m script. +% INPUTS: +% - csvFilePath : path to text file with csv data. The separator between columns is defined by ‘,'. +% Description of the most popular GDF file formats can be found at https://docs.cyfronet.pl/display/ISDOC/GDF +% - column_desc : structure containing information how the gdf should be constructed +% - description : description written into the file + + data = readAndCheckHeaders(csvFilePath, column_desc, useCsvHeader); + colCount = length(column_desc); + noCsvHeaderModifier = 1; + if useCsvHeader + noCsvHeaderModifier = 0; + end + rowCount = length(data) / colCount - 1 + noCsvHeaderModifier; + + [FormatName] = 'GDF'; + [FormatVersion] = 2.1; + [CRS] = 'n/a'; + [TimeZone] = 'UTC'; + [Description] = description; + + [FieldDescription] = {}; + [FieldType] = {}; + [FieldUnit] = {}; + [d] = struct(); + + [columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc); + colInGdf = 1; % column number in the generated gdf + for col=1:colCount + current_col = column_desc(col); + if ~current_col.include || ismember(col, columnsNotToInclude) + continue; + end + + inputTimeGroup = current_col.inputTimeGroup; + if ~isempty(current_col.inputTimeGroup) + timeGroupArray = columnsInTimeGroup(inputTimeGroup); + for columnInGroup = 2 : length(timeGroupArray) + current_col.inputTimeFormat = [current_col.inputTimeFormat "-" column_desc(timeGroupArray(columnInGroup)).inputTimeFormat]; + end + end + + fieldName = current_col.nameInCatalog; + FieldDescription(colInGdf, 1) = fieldName; + FieldDescription(colInGdf, 2) = current_col.description; + FieldType(colInGdf, 1) = fieldName; + FieldType(colInGdf, 2) = current_col.format; + FieldUnit(colInGdf, 1) = fieldName; + FieldUnit(colInGdf, 2) = current_col.unit; + d.(fieldName) = []; + for row = 1 : rowCount + rawValue = data{(colCount * (row - noCsvHeaderModifier)) + col, 1}; + if ~isempty(current_col.inputTimeGroup) + timeGroupArray = columnsInTimeGroup(inputTimeGroup); + for columnInGroup = 2 : length(timeGroupArray) + rawValue = [rawValue "-" data{(colCount * (row - noCsvHeaderModifier)) + timeGroupArray(columnInGroup), 1}]; + end + end + if isempty(rawValue) + if isText(current_col) + d.(fieldName)(row) = {''}; + else + d.(fieldName)(row) = NaN; + end + else + parsedValue = parseTextValue(rawValue, current_col.inputType, current_col.inputTimeFormat); + if strcmp(current_col.format, '5a') + d.(fieldName)(row) = { datestr(parsedValue, 'yyyy') }; + elseif strcmp(current_col.format, '5b') + d.(fieldName)(row) = { datestr(parsedValue, 'yyyy-mm') }; + else + d.(fieldName)(row) = parsedValue; + end + end + end + colInGdf = colInGdf + 1; + end + + [~, gdfFileName, ~] = fileparts(csvFilePath); + save(strcat(gdfFileName, '.mat'), 'FormatName', 'FormatVersion', 'CRS', 'TimeZone', 'Description', ... + 'FieldDescription', 'FieldType', 'FieldUnit', 'd', '-v7') +end \ No newline at end of file diff --git a/matlab/csvconverter/getTimeGroups.m b/matlab/csvconverter/getTimeGroups.m new file mode 100644 index 0000000..d6dd127 --- /dev/null +++ b/matlab/csvconverter/getTimeGroups.m @@ -0,0 +1,23 @@ +% ----------------- +% Copyright © 2023 ACK Cyfronet AGH, Poland. +% ----------------- +function [columnsInTimeGroup, columnsNotToInclude] = getTimeGroups(column_desc) +% DESCRIPTION: Script iterating through column_desc and returning column indexes grouped by the same +% inputTimeGroup. The second output is array of all the other columns indexes than first in their own respective time group +% INPUTS: +% - column_desc : structure containing definition of the CSV columns and their mapping to the final object + columnsInTimeGroup = containers.Map(); + columnsNotToInclude = []; + + for i=1:length(column_desc) + inputTimeGroup = column_desc(i).inputTimeGroup; + if ~isempty(inputTimeGroup) + if ~ismember(inputTimeGroup, columnsInTimeGroup.keys) + columnsInTimeGroup(inputTimeGroup) = [i]; + else + columnsInTimeGroup(inputTimeGroup) = cat(1, columnsInTimeGroup(inputTimeGroup), i); + columnsNotToInclude = cat(1, columnsNotToInclude, i); + end + end + end +end \ No newline at end of file diff --git a/matlab/csvconverter/isText.m b/matlab/csvconverter/isText.m new file mode 100644 index 0000000..4388acd --- /dev/null +++ b/matlab/csvconverter/isText.m @@ -0,0 +1,10 @@ +% ----------------- +% Copyright © 2023 ACK Cyfronet AGH, Poland. +% ----------------- + +function isText = isText(col_desc) +% DESCRIPTION: Function checking if given column is of text type +% INPUTS: +% - col_desc : structure containing information how the column should be constructed + isText = strcmp(col_desc.inputType, 'TEXT') | strcmp(col_desc.format, '5a') | strcmp(col_desc.format, '5b'); +end \ No newline at end of file diff --git a/matlab/csvconverter/parseTextValue.m b/matlab/csvconverter/parseTextValue.m new file mode 100644 index 0000000..77f1e23 --- /dev/null +++ b/matlab/csvconverter/parseTextValue.m @@ -0,0 +1,35 @@ +% ----------------- +% Copyright © 2023 ACK Cyfronet AGH, Poland. +% ----------------- + +function parsedValue = parseTextValue(rawValue, type, timeFormat) +% DESCRIPTION: Program that parses and returns value read from the text (a cell from a CSV file). +% INPUTS: +% - rawValue : value to parse +% - type : type of the value as defined by CsvColumnContentType.java +% - timeFormat : if the rawValue contains time, this format is used to parse it + + switch type + case {'REAL', 'INTEGER'} + try + parsedValue = str2num(rawValue); + catch + error('Cannot parse number input (type: %s): %s', type, rawValue); + end + if isempty(parsedValue) + % we checked if the value is empty before parsing (and such value will not be parsed), if the value is empty + % here (after parsing), it means that it was in a wrong format and could not be parsed + error('Cannot parse number input (type: %s): %s', type, rawValue); + end + case 'TEXT' + parsedValue = {rawValue}; + case 'DATE_TIME' + try + parsedValue = datenum(rawValue, timeFormat); + catch + error('Invalid input time format specification or CSV content to parse (%s)', rawValue); + end + otherwise + error('Unexpected input column type %s', type); + end +end \ No newline at end of file diff --git a/matlab/csvconverter/readAndCheckHeaders.m b/matlab/csvconverter/readAndCheckHeaders.m new file mode 100644 index 0000000..0d1438c --- /dev/null +++ b/matlab/csvconverter/readAndCheckHeaders.m @@ -0,0 +1,33 @@ +% ----------------- +% Copyright © 2023 ACK Cyfronet AGH, Poland. +% ----------------- + +function data = readAndCheckHeaders(csvFilePath, column_desc, doCheckHeaders) +% DESCRIPTION: Program that reads content from the CSV file, checking if the content matches the headers defined +% in the column_desc structure. The returned value is a cell with all values from the csv file. +% INPUTS: +% - csvFilePath : path to the CSV file +% - column_desc : structure containing definition of the CSV columns and their mapping to the final object + + fid = fopen(csvFilePath); + data = textscan(fid, '%q', 'Delimiter', ','){1}; % cell with all values from the csv file + fclose(fid); + if doCheckHeaders + check_headers(data, column_desc); + end +end + +function check_headers(data, column_desc) + colCount = length(column_desc); + headers = data(1:colCount); + for i=1:colCount + if ~strcmp(column_desc(i).nameInCsv, headers(i)) + error('Expected column %s, but found %s in CSV headers', column_desc(i).nameInCsv, char(headers(i))); + end + end + + if mod(length(data), colCount) ~= 0 + error('Improper number of values in one of the rows'); + end + +end \ No newline at end of file diff --git a/matlab/m2m/LinReg.m b/matlab/m2m/LinReg.m new file mode 100644 index 0000000..b9bac6f --- /dev/null +++ b/matlab/m2m/LinReg.m @@ -0,0 +1,186 @@ +% function [msyn,a,b,sa,sb,cc,rms,N]=LinReg(m1,m2,opt,n) +% ----------------------------------------------------------------------------------------------- +% The program calculates the linear regression parameters (a and b) +% and their uncertainties by applying different regression techniques. +% ----- +% The program calls the following functions, appended in the scipt: +% function [a,b,sa,sb,rms,N]=GOR(m1,m2,n) - General Orthogonal Regression +% function [a b sa sb rms]=OLS(m1,m2) - Ordinary Least Squares +% function [a b sa sb rms]=ILS(m1,m2) - Inverted Least Squares +% ----------------------------------------------------------------------------------------------- +% COMPLIED BY: +% Leptokaropoulos Konstantinos, June 2014, (kleptoka@igf.edu.pl) within IS-EPOS Project, +% ------------------------------------------------------------------------------------------------ +% DESCRIPTION +% The program is used to establish relationships between different magnitude scales. +% These relationships can be then used for converting diverse magnitude scales into a +% common one, such that a homogeneous magnitude is applied for further analyses. +% The suggested method is GOR, because the ordinary least-squares assumes that +% there are no uncertainties in the values of the independent variable, a condition that +% is never truth when dealing with earthquake magnitudes. This may introduce +% systematic errors in magnitude conversion, apparent catalog incompleteness, and +% significant bias in the estimates of the b-value. According to GOR technique, the +% projection of the independent variable is done along a weighted orthogonal distance +% from the linear fitting curve. Although the error varriance ratio of the two variables +% is introduced in calculations, Castellaro et al. (2006) showed that even if the applied +% values differ from the real ones, GOR method still performs better than the OLS. +% Despite the GOR superiority, OLS and ILS are also provided as supplementary +% options in the present program, for comparison. +% ------------------------------------------------------------------------------------------------ +% INPUT PARAMETERS: +% m1 - variable 1 (e.g. magnitude 1) [VECTOR] +% m2 - variable 2 (e.g. magnitude 2) [VECTOR] +% opt - regression method selection (1,2 or 3): [SCALAR] (Default=1) +% 1 - General Orthogonal Regression - GOR function +% 2 - Ordinary Least Squares - OLS function +% 3 - Inverted Least Squares - ILS function +% n - error variance ratio (var(m2)/var(m1)) [SCALAR] (FOR GOR only, Default=1) +% ------------------------------------------------------------------------------------------------ +% OUTPUT: +% msyn - Converted Magnitude ([VECTOR] +% a - intercept [SCALAR] +% b - slope [SCALAR] +% sa - 95% confidence interval for intercept [SCALAR] +% sb - 95% confidence interval for slope [SCALAR] +% cc - Pearson correlation coefficient between vectors m1 and m2 [SCALAR-optional] +% rms - root mean square error [SCALAR-optional] +% N - number of data pairs m1,m2 [SCALAR-optional] +% --------------------------------------------------------------------------------------------- +% REFERENCES +% **** (for General Orthogonal Regression) **** +% Fuller,W. A. (1987). Measurement Error Models,Wiley, New York, 440 pp. +% Castellaro, S., F. Mulargia, and Y. Y. Kagan (2006). Regression problems for magnitudes, +% Geophys. J. Int. 165, 913–930. +% Lolli, B., and P. Gasperini (2012). A comparison among general orthogonal regression methods +% applied to earthquake magnitude conversions, Geophys. J. Int. 190, 1135–1151. +% Wason, H. R., R. Das, and M. L. Sharma (2012).Magnitude conversion problem using general +% orthogonal regression, Geophys. J. Int. 190, 1091–1096. +% ---------------------------------------------------------------------------------------------- +% LICENSE +% This file is a part of the IS-EPOS e-PLATFORM. +% +% This is free software: you can redistribute it and/or modify it under +% the terms of the GNU General Public License as published by the Free +% Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . +% ------------------------------------------------------------------------------------------------ + +function [msyn,a,b,sa,sb,cc,rms,N]=LinReg(m1,m2,opt,n) + +%%%%% Remove NaN values - K03JUN2015 %%%%% +mm1=m1(isnan(m1)==0 & isnan(m2)==0); +mm2=m2(isnan(m1)==0 & isnan(m2)==0); +m1=mm1;m2=mm2; +%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +if nargin==2, opt=1; n=1; +elseif nargin==3 , n=1;end + +% CC +cc=corrcoef(m1,m2); +cc=cc(2); +N=length(m1); + +if opt==1 +[a,b,sa,sb,rms]=GOR(m1,m2,n); +elseif opt==2 +[a,b,sa,sb,rms]=OLS(m1,m2); +elseif opt==3 +[a,b,sa,sb,rms]=ILS(m1,m2); +end +msyn=m1*b+a; +end + +% ******************************************************** +% ------------------------- GOR -------------------------------- +function [a,b,sa,sb,rms,N]=GOR(m1,m2,n) + +if nargin==2 || isempty(n),n=1;end + +%GOR parameters +f=1; %alternative f=0 +cova=cov(m1,m2,f); +mx=var(m1,f); +my=var(m2,f); +mxy=cova(1,2); + +b=(my-n*mx+sqrt((my-n*mx)^2+4*n*mxy^2))/(2*mxy); + a=mean(m2)-b*mean(m1); + + % BASIC STATISTICS +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +N=length(m1); + + % Confidence Intervals of a and b-values +d=(my-n*mx); + +sx=(sqrt(d^2+4*n*mxy^2)-d)/(2*n); +sy=(my+n*mx-sqrt(d^2+4*n*mxy^2))/(2*n); + +s=((N-1)*(n+b^2)*sy)/(N-2); + +Vb=(mx*s-(b*sy)^2)/((N-1)*sx^2); +Va=s/N+Vb*mean(m1)^2; + +sb=sqrt(Vb)*tinv(0.975,N-2); +sa=sqrt(Va)*tinv(0.975,N-2); +end + +% ----------------------------- OLS ---------------------------- +function [a b sa sb rms]=OLS(m1,m2) +f=1;N=length(m1); +cova=cov(m1,m2,f); +mx=var(m1,f); +my=var(m2,f); +mxy=cova(1,2); + +p=polyfit(m1,m2,1); + +vb=my/sum((m1-mean(m1)).^2); +va=my*sum(m1.^2)/(N*sum((m1-mean(m1)).^2)); +sb=tinv(0.975,N-2)*sqrt(vb); +sa=tinv(0.975,N-2)*sqrt(va); +b=p(1);a=p(2); + +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +end + +% ----------------------------- ILS ---------------------------- +function [a b sa sb rms]=ILS(m1,m2) +f=1;N=length(m1); +cova=cov(m1,m2,f); +mx=var(m2,f); +my=var(m1,f); +mxy=cova(1,2); + +p=polyfit(m2,m1,1); +q=[1/p(1) -p(2)/p(1)]; + + +vb=my/sum((m2-mean(m2)).^2); +va=my*sum(m2.^2)/(N*sum((m2-mean(m2)).^2)); +sb=tinv(0.975,N-2)*sqrt(vb); +sa=tinv(0.975,N-2)*sqrt(va); + +sq1=abs(sb*q(1)/p(1)); +sq2=abs(sb*q(2)/p(2)); +b=q(1);a=q(2);sb=sq1;sa=sq2; + +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +end + + + + diff --git a/matlab/m2m/LinRegWrapper.m b/matlab/m2m/LinRegWrapper.m new file mode 100644 index 0000000..ad41f2a --- /dev/null +++ b/matlab/m2m/LinRegWrapper.m @@ -0,0 +1,32 @@ +% +% ----------------- +% Copyright © 2019 ACK Cyfronet AGH, Poland. +% +% This work was partially funded by EPOS Project funded in frame of PL-POIR4.2 +% -------------- +% +function [msyn, outData, m1, m2] = LinRegWrapper(catalog, m1Type, m2Type, opt, n) + [m1, m2] = extractColumns(catalog, m1Type, m2Type); + [m1, m2] = eliminateEmptyValues(m1, m2); + if isLogarithmComparableToMagnitude(m1Type); m1 = log10(m1); end + if isLogarithmComparableToMagnitude(m2Type); m2 = log10(m2); end + + if exist('OCTAVE_VERSION', 'builtin') > 0 + [msyn, a, b, sa, sb, cc, rms, N] = LinReg_O(m1, m2, opt, n); + else + [msyn, a, b, sa, sb, cc, rms, N] = LinReg(m1, m2, opt, n); + end + + outData.a = a; + outData.b = b; + outData.sa = sa; + outData.sb = sb; + outData.cc = cc; + outData.rms = rms; + outData.N = N; +end + +function result = isLogarithmComparableToMagnitude(columnType) + result = strcmp(columnType, 'E') || strcmp(columnType, 'M0'); +end + diff --git a/matlab/m2m/LinReg_O.m b/matlab/m2m/LinReg_O.m new file mode 100644 index 0000000..00f6f55 --- /dev/null +++ b/matlab/m2m/LinReg_O.m @@ -0,0 +1,186 @@ +% function [msyn,a,b,sa,sb,cc,rms,N]=LinReg_O(m1,m2,opt,n) - Octave Compatible Version +% ----------------------------------------------------------------------------------------------- +% The program calculates the linear regression parameters (a and b) +% and their uncertainties by applying different regression techniques. +% ----- +% The program calls the following functions, appended in the scipt: +% function [a,b,sa,sb,rms,N]=GOR(m1,m2,n) - General Orthogonal Regression +% function [a b sa sb rms]=OLS(m1,m2) - Ordinary Least Squares +% function [a b sa sb rms]=ILS(m1,m2) - Inverted Least Squares +% ----------------------------------------------------------------------------------------------- +% COMPLIED BY: +% Leptokaropoulos Konstantinos, June 2014, (kleptoka@igf.edu.pl) within IS-EPOS Project, +% ------------------------------------------------------------------------------------------------ +% DESCRIPTION +% The program is used to establish relationships between different magnitude scales. +% These relationships can be then used for converting diverse magnitude scales into a +% common one, such that a homogeneous magnitude is applied for further analyses. +% The suggested method is GOR, because the ordinary least-squares assumes that +% there are no uncertainties in the values of the independent variable, a condition that +% is never truth when dealing with earthquake magnitudes. This may introduce +% systematic errors in magnitude conversion, apparent catalog incompleteness, and +% significant bias in the estimates of the b-value. According to GOR technique, the +% projection of the independent variable is done along a weighted orthogonal distance +% from the linear fitting curve. Although the error varriance ratio of the two variables +% is introduced in calculations, Castellaro et al. (2006) showed that even if the applied +% values differ from the real ones, GOR method still performs better than the OLS. +% Despite the GOR superiority, OLS and ILS are also provided as supplementary +% options in the present program, for comparison. +% ------------------------------------------------------------------------------------------------ +% INPUT PARAMETERS: +% m1 - variable 1 (e.g. magnitude 1) [VECTOR] +% m2 - variable 2 (e.g. magnitude 2) [VECTOR] +% opt - regression method selection (1,2 or 3): [SCALAR] (Default=1) +% 1 - General Orthogonal Regression - GOR function +% 2 - Ordinary Least Squares - OLS function +% 3 - Inverted Least Squares - ILS function +% n - error variance ratio (var(m2)/var(m1)) [SCALAR] (FOR GOR only, Default=1) +% ------------------------------------------------------------------------------------------------ +% OUTPUT: +% msyn - Converted Magnitude ([VECTOR] +% a - intercept [SCALAR] +% b - slope [SCALAR] +% sa - 95% confidence interval for intercept [SCALAR] +% sb - 95% confidence interval for slope [SCALAR] +% cc - Pearson correlation coefficient between vectors m1 and m2 [SCALAR-optional] +% rms - root mean square error [SCALAR-optional] +% N - number of data pairs m1,m2 [SCALAR-optional] +% --------------------------------------------------------------------------------------------- +% REFERENCES +% **** (for General Orthogonal Regression) **** +% Fuller,W. A. (1987). Measurement Error Models,Wiley, New York, 440 pp. +% Castellaro, S., F. Mulargia, and Y. Y. Kagan (2006). Regression problems for magnitudes, +% Geophys. J. Int. 165, 913–930. +% Lolli, B., and P. Gasperini (2012). A comparison among general orthogonal regression methods +% applied to earthquake magnitude conversions, Geophys. J. Int. 190, 1135–1151. +% Wason, H. R., R. Das, and M. L. Sharma (2012).Magnitude conversion problem using general +% orthogonal regression, Geophys. J. Int. 190, 1091–1096. +% ---------------------------------------------------------------------------------------------- +% LICENSE +% This file is a part of the IS-EPOS e-PLATFORM. +% +% This is free software: you can redistribute it and/or modify it under +% the terms of the GNU General Public License as published by the Free +% Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . +% ------------------------------------------------------------------------------------------------ + +function [msyn,a,b,sa,sb,cc,rms,N]=LinReg_O(m1,m2,opt,n) + +%%%%% Remove NaN values - K03JUN2015 %%%%% +mm1=m1(isnan(m1)==0 & isnan(m2)==0); +mm2=m2(isnan(m1)==0 & isnan(m2)==0); +m1=mm1;m2=mm2; +%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if nargin==2, opt=1; n=1; +elseif nargin==3 , n=1;end + +% CC +cc=corr(m1,m2); +N=length(m1); + +if opt==1 +[a,b,sa,sb,rms]=GOR(m1,m2,n); +elseif opt==2 +[a,b,sa,sb,rms]=OLS(m1,m2); +elseif opt==3 +[a,b,sa,sb,rms]=ILS(m1,m2); +end +msyn=m1*b+a; +end + +% ******************************************************** +% ------------------------- GOR -------------------------------- +function [a,b,sa,sb,rms,N]=GOR(m1,m2,n) + +if nargin==2 || isempty(n),n=1;end + +%GOR parameters +f=1; %alternative f=0 +mx=var(m1,f); +my=var(m2,f); +mxy=cov(m1,m2,f); +%mxy=mxy(2); Enable for Matlab version + +b=(my-n*mx+sqrt((my-n*mx)^2+4*n*mxy^2))/(2*mxy); + a=mean(m2)-b*mean(m1); + + % BASIC STATISTICS +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +N=length(m1); + + % Confidence Intervals of a and b-values +d=(my-n*mx); + +sx=(sqrt(d^2+4*n*mxy^2)-d)/(2*n); +sy=(my+n*mx-sqrt(d^2+4*n*mxy^2))/(2*n); + +s=((N-1)*(n+b^2)*sy)/(N-2); + +Vb=(mx*s-(b*sy)^2)/((N-1)*sx^2); +Va=s/N+Vb*mean(m1)^2; + +sb=sqrt(Vb)*tinv(0.975,N-2); +sa=sqrt(Va)*tinv(0.975,N-2); +end + +% ----------------------------- OLS ---------------------------- +function [a b sa sb rms]=OLS(m1,m2) +f=1;N=length(m1); +mxy=cov(m1,m2,f); +mx=var(m1,f); +my=var(m2,f); + +p=polyfit(m1,m2,1); + +vb=my/sum((m1-mean(m1)).^2); +va=my*sum(m1.^2)/(N*sum((m1-mean(m1)).^2)); +sb=tinv(0.975,N-2)*sqrt(vb); +sa=tinv(0.975,N-2)*sqrt(va); +b=p(1);a=p(2); + +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +end + +% ----------------------------- ILS ---------------------------- +function [a b sa sb rms]=ILS(m1,m2) +f=1;N=length(m1); +mxy=cov(m1,m2,f); +mx=var(m1,f); +my=var(m2,f); + +p=polyfit(m2,m1,1); +q=[1/p(1) -p(2)/p(1)]; + + +vb=my/sum((m2-mean(m2)).^2); +va=my*sum(m2.^2)/(N*sum((m2-mean(m2)).^2)); +sb=tinv(0.975,N-2)*sqrt(vb); +sa=tinv(0.975,N-2)*sqrt(va); + +sq1=abs(sb*q(1)/p(1)); +sq2=abs(sb*q(2)/p(2)); +b=q(1);a=q(2);sb=sq1;sa=sq2; + +msyn=b*m1+a; +rms=sqrt(sum((m2-msyn).^2)/(length(m2))); +end + +% Testing plotting +%plot(c.Ml,c.Mw,'ko') +%hold on +%plot([min(c.Ml) max(c.Ml)],[min(c.Ml)*b+a max(c.Ml)*b+a],'r--') + + + diff --git a/matlab/multipletidentification/readsac.m b/matlab/multipletidentification/readsac.m new file mode 100644 index 0000000..7b7c0ed --- /dev/null +++ b/matlab/multipletidentification/readsac.m @@ -0,0 +1,287 @@ +function F=readsac(files) +% F=readsac('files') +% +% Read a SAC-file or a set of SAC-files. +% +% Input : +% 'files' corresponds either to the complete name of one SAC-file, +% either to a set of SAC-files with the logical expression *. +% Output : +% F is a structure (length equal to the number of SAC-files read). +% It contains ALL the fields of the SAC's header and the trace in +% the field "trace". +% +% EXAMPLE : +% F=readsac('2004.02.23-17.31.00.ABH.SHZ.SAC') +% reads the file 2004.02.23-17.31.00.ABH.SHZ.SAC and saves it into +% the structure F (dimension: 1*1). +% F=readsac('Data/*BHZ*.SAC') +% reads all the files of the form *BHZ*.SAC in the directory "Data". +% The size of F equals the number of these files. +% +% From J. Vergne and G. Hetenyi + + +%------------------------------------------------------------------ +% By default, the signals are read in mode 'r' +modlect='r'; + +% Determine the type of "files" +rep_files=fileparts(files); +list_files=dir(files); + +if length(list_files)==0 +% disp('"readsac": File(s) do not exist'); + F.tau=-1; +else + +for ifile=1:length(list_files) + + nomfich=fullfile(rep_files,list_files(ifile).name); + clear dsac; + + % Read signals + % ------------ + % Following the signal type, reading procedure can be different: + % 'l' - IEEE floating point with little-endian byte ordering + % 'b' - IEEE floating point with big-endian byte ordering + + [C,MAXSIZE,ENDIAN]=computer; + if ENDIAN=='L' bool_l=0; + else bool_l=1; + end + + fidev=fopen(nomfich,modlect,'l'); +if fidev > 0 + + h1=fread(fidev,70,'float'); % -------------- + h2=fread(fidev,40,'long'); % reading header + h3=fread(fidev,192,'uchar'); % -------------- + + % testing byte-order, h2(7) must! be 6. + if h2(7)~=6 + bool_l=1; + fclose(fidev); + fidev=fopen(nomfich,modlect,'b'); + h1=fread(fidev,70,'float'); + h2=fread(fidev,40,'long'); + h3=fread(fidev,192,'uchar'); + end + + % reading trace + tamp=fread(fidev,inf,'float'); + + dsac.h1=h1; + dsac.h2=h2; + dsac.h3=h3; + + + % PART 1: reading float-type parameters + % ------------------------------------------ + % %- comment are from original version, + % % comments ares from SAC-homepage + + dsac.delta=h1(1); %- sampling time interval + dsac.depmin=h1(2); % minimum value of dependent variable + dsac.depmax=h1(3); % maximum value of dependent variable + dsac.scale=h1(4); % multiplying scale factor for dependent variable (not currently used) + dsac.odelta=h1(5); % observed increment if different from nominal value + + dsac.b=h1(6); %- begin time (d�calage du 1er �chantillon) (beginning value of independent variable) + dsac.e=h1(7); % ending value of independent variable + dsac.o=h1(8); %- event origin time (seconds relative to reference time) + dsac.a=h1(9); % first arrival time (seconds relative to reference time) + dsac.internal10=h1(10); % INTERNAL + + dsac.t0=h1(11); %- user defined time picks or markers + dsac.t1=h1(12); %- (seconds relative to reference time) + dsac.t2=h1(13); %- + dsac.t3=h1(14); %- + dsac.t4=h1(15); %- + dsac.t5=h1(16); % + dsac.t6=h1(17); % + dsac.t7=h1(18); % + dsac.t8=h1(19); % + dsac.t9=h1(20); % + + dsac.f=h1(21); % fini or end of event time (seconds relative to reference time) + dsac.resp0=h1(22); % instrument response parameters (not currently used) + dsac.resp1=h1(23); % + dsac.resp2=h1(24); % + dsac.resp3=h1(25); % + dsac.resp4=h1(26); % + dsac.resp5=h1(27); % + dsac.resp6=h1(28); % + dsac.resp7=h1(29); % + dsac.resp8=h1(30); % + dsac.resp9=h1(31); % + + + dsac.stla=h1(32); %- station latitude (degrees, north positive) + dsac.stlo=h1(33); %- station longitude (degrees, east positive) + dsac.stel=h1(34); %- station elevation (meters) + dsac.stdp=h1(35); % station depth below surface (meters)(not currently used) + + dsac.evla=h1(36); %- event latitude (degrees, north positive) + dsac.evlo=h1(37); %- event longitude (degrees, east positive) + dsac.evel=h1(38); % event elevation (meters)(not currently used) + dsac.evdp=h1(39); %- event depth below surface (meters) + dsac.mag=h1(40); % event magnitude + + + dsac.user0=h1(41); %- used defined variable storage area, floating! + dsac.user1=h1(42); %- + dsac.user2=h1(43); %- + dsac.user3=h1(44); % + dsac.user4=h1(45); % + dsac.user5=h1(46); % + dsac.user6=h1(47); % + dsac.user7=h1(48); % + dsac.user8=h1(49); % + dsac.user9=h1(50); % + + dsac.dist=h1(51); %- station to event distance (km) + dsac.az=h1(52); %- event to station azimuth (degrees) + dsac.baz=h1(53); %- station to event azimuth (degrees) + dsac.gcarc=h1(54); %- station to event great circle arc length (degrees) + dsac.internal55=h1(55); % INTERNAL + + dsac.internal56=h1(56); % INTERNAL + dsac.depmen=h1(57); % mean value of dependent variable + dsac.cmpaz=h1(58); %- component azimuth (degrees clockwise from north) + dsac.cmpinc=h1(59); %- component incidence angle (degrees from vertical) + + dsac.xminimum=h1(60); % minimum value of X (spectral files only) + dsac.xmaximum=h1(61); % maximum value of X (spectral files only) + dsac.yminimum=h1(62); % minimum value of Y (spectral files only) + dsac.ymaximum=h1(63); % maximum value of Y (spectral files only) + dsac.unused64=h1(64); % UNUSED + dsac.unused65=h1(65); % UNUSED + dsac.unused66=h1(66); % UNUSED + dsac.unused67=h1(67); % UNUSED + dsac.unused68=h1(68); % UNUSED + dsac.unused69=h1(69); % UNUSED + dsac.unused70=h1(70); % UNUSED + + + % PART 2: reading long-type parameters + % ------------------------------------ + + % GMT time corresponding to reference (0) time in file + dsac.nzyear=h2(1); %- year + dsac.nzjday=h2(2); %- julian day + dsac.nzhour=h2(3); %- hour + dsac.nzmin=h2(4); %- minute + dsac.nzsec=h2(5); % second + dsac.nzmsec=h2(6); % millisecond + dsac.sec=h2(5)+h2(6)/1000; %- full second (NOT an original SAC-attribute!) + + dsac.nvhdr=h2(7); % header version number: 6! + dsac.norid=h2(8); % origin ID (CSS 3.0) + dsac.nevid=h2(9); % event ID (CSS 3.0) + + dsac.npts=h2(10); %- number of points per data component + + dsac.internal81=h2(11);% INTERNAL + dsac.nwfid=h2(12); % waveform ID (CSS 3.0) + dsac.nxsize=h2(13); % spectral length (spectral files only) + dsac.nysize=h2(14); % spectral width (spectral files only) + dsac.unused85=h2(15); % UNUSED + + dsac.iftype=h2(16); % type of file (required)(see SAC-page) + dsac.idep=h2(17); % type of dependent variable (see SAC-page) + dsac.iztype=h2(18); % reference time equivalence (see SAC-page) + dsac.unused89=h2(19); % UNUSED + dsac.iinst=h2(20); % type of recording instrument (not currently used) + + dsac.istreg=h2(21); % station geogrpahic region (not currently used) + dsac.ievreg=h2(22); % event geographic location (not currently used) + dsac.ievtyp=h2(23); % type of event (see SAC-page) + dsac.iqual=h2(24); % quality of data (not currently used)(see SAC-page) + dsac.isynth=h2(25); % synthetic data flag (not currently used) + + dsac.imagtyp=h2(26); % magnitude type + dsac.imagsrc=h2(27); % source of magnitude information + dsac.unused98=h2(28); % UNUSED + dsac.unused99=h2(29); % UNUSED + dsac.unused100=h2(30); % UNUSED + + dsac.unused101=h2(31); % UNUSED + dsac.unused102=h2(32); % UNUSED + dsac.unused103=h2(33); % UNUSED + dsac.unused104=h2(34); % UNUSED + dsac.unused105=h2(35); % UNUSED + + dsac.leven=h2(36); % TRUE if data is evenly spaced + dsac.lspol=h2(37); % TRUE if station components have positive polarity (left-hand rule) + dsac.lovrok=h2(38); % TRUE if it is okay to overwrite this file on disk + dsac.lcalda=h2(39); % TRUE if DIST,AZ,BAZ and GCARC are to be calculated form station and event coordinates + dsac.unused110=h2(40); % UNUSED + + + % PART 3: reading uchar-type parameters + % ------------------------------------- + + imin1=min(find(h3(1:8)==0 | h3(1:8)==32)); + imin2=min(find(h3(9:24)==0 | h3(9:24)==32)); + + dsac.kstnm=rm_blanc(h3(1:1+imin1-1))'; %- station name + dsac.kevnm=rm_blanc(h3(9:9+imin2-1))'; %- event name + + dsac.khole=rm_blanc(h3(25:32))'; % hole identification if nuclear event + dsac.ko=rm_blanc(h3(33:40))'; % event origin time identification + dsac.ka=rm_blanc(h3(41:48))'; % first arrival time identification + + dsac.kt0=rm_blanc(h3(49:56))'; %- user defined time pick identifications, h1(11:20) + dsac.kt1=rm_blanc(h3(57:64))'; %- + dsac.kt2=rm_blanc(h3(65:72))'; %- + dsac.kt3=rm_blanc(h3(73:80))'; %- + dsac.kt4=rm_blanc(h3(81:88))'; %- + dsac.kt5=rm_blanc(h3(89:96))'; % + dsac.kt6=rm_blanc(h3(97:104))'; % + dsac.kt7=rm_blanc(h3(105:112))'; % + dsac.kt8=rm_blanc(h3(113:120))'; % + dsac.kt9=rm_blanc(h3(121:128))'; % + + dsac.kf=rm_blanc(h3(129:136))'; % fini identification + + dsac.kuser0=rm_blanc(h3(137:144))'; %- user defined variable storage area + dsac.kuser1=rm_blanc(h3(145:152))'; %- + dsac.kuser2=rm_blanc(h3(153:160))'; %- + + dsac.kcmpnm=rm_blanc(h3(161:168))'; %- component name + dsac.knetwk=rm_blanc(h3(169:176))'; % name of seismic network + dsac.kdatrd=rm_blanc(h3(177:184))'; % date data was read onto computer + + dsac.kinst=rm_blanc(h3(185:192))'; %- generic name of recording instrument + + + % reading trace + % ------------- + + dsac.trace=tamp; + + dsac.tau = 1; % Added to check if the file exist + fclose(fidev); + +else + %disp(['"readsac": file ' nomfich ' : reading error - file does not exist']) + dsac.tau=-1; +end + F(ifile)=dsac; +end + + +end + +% ------------------------------------------------------------- + +function ch1=rm_blanc(ch) + +% looks for whitespace in 'ch' and removes them + if ischar(ch) + ch1=ch(find(double(ch)~=32) & double(ch)~=0); + else + ch1=ch(find(ch~=32 & ch~=0)); + ch1=char(ch1); + end diff --git a/matlab/multipletidentification/sac2ascii.m b/matlab/multipletidentification/sac2ascii.m new file mode 100644 index 0000000..f7405b9 --- /dev/null +++ b/matlab/multipletidentification/sac2ascii.m @@ -0,0 +1,69 @@ +function [] = sac2ascii(sac, asciiFilePath) +%sac2ascii Saves given sac data in ascii file in slist format + + % check required fields + checkValue(sac.knetwk, 'knetwk'); + checkValue(sac.kstnm, 'kstnm'); + checkValue(sac.kcmpnm, 'kcmpnm'); + checkValue(sac.delta, 'delta'); + checkValue(sac.nzyear, 'nzyear'); + checkValue(sac.nzjday, 'nzjday'); + checkValue(sac.nzhour, 'nzhour'); + checkValue(sac.nzmin, 'nzmin'); + checkValue(sac.sec, 'sec'); + + networkCode = sac.knetwk; + stationCode = sac.kstnm; + locationCode = sac.khole; + if isUndefined(locationCode); locationCode = ''; end + channelCode = sac.kcmpnm; + frequency = int32(1 / sac.delta); + unit = extractUnit(sac); + startTime = extractStartTime(sac); + samples = length(sac.trace); + + fileID = fopen(asciiFilePath, 'w'); + fprintf(fileID, 'TIMESERIES %s_%s_%s_%s, %d samples, %d sps, %s, SLIST, FLOAT, %s\n',... + networkCode, stationCode, locationCode, channelCode, samples, frequency, startTime, unit); + fprintf(fileID, '%f\n', sac.trace); + fclose(fileID); + +end + +function [unit] = extractUnit(sac) + switch sac.idep + case 6 + unit = 'NM'; + case 7 + unit = 'NM/S'; + case 8 + unit = 'NM/S^2'; + otherwise + unit = 'COUNTS'; + end +end + +function [startTime] = extractStartTime(sac) + % converting day of the year to day of the month + yearString = ['01-01-', num2str(sac.nzyear)]; + date = datenum(yearString, 'dd-mm-yyyy') + sac.nzjday - 1; + dateString = datestr(date, 'yyyy-mm-dd'); + + startTime = sprintf('%sT%02d:%02d:%09.6f', dateString, sac.nzhour, sac.nzmin, sac.sec); +end + +function [] = checkValue(value, fieldName) + if isUndefined(value) + error('sac does not contain required field - [%s]', fieldName); + end +end + +function [undefined] = isUndefined(value) + if isa(value, 'char') + undefined = strcmp(value, '-12345'); + elseif isa(value, 'logical') + undefined = value == false; + else + undefined = abs(-12345 - value) < 1e-10; + end +end diff --git a/matlab/unitutils/deg2km.m b/matlab/unitutils/deg2km.m new file mode 100644 index 0000000..3aea027 --- /dev/null +++ b/matlab/unitutils/deg2km.m @@ -0,0 +1,25 @@ +function km = deg2km(deg) +%DEG2KM Convert distance from degrees to kilometers +% +% KM = DEG2KM(DEG) converts distances from degrees to kilometers as +% measured along a great circle on a sphere with a radius of 6371 km, the +% mean radius of the Earth. +% +% KM = DEG2KM(DEG,RADIUS) converts distances from degrees to kilometers +% as measured along a great circle on a sphere having the specified +% radius. RADIUS must be in units of kilometers. +% +% KM = DEG2KM(DEG,SPHERE) converts distances from degrees to kilometers, +% as measured along a great circle on a sphere approximating an object in +% the Solar System. SPHERE may be one of the following strings: 'sun', +% 'moon', 'mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', +% 'uranus', 'neptune', or 'pluto', and is case-insensitive. +% +% See also DEG2NM, DEGTORAD, DEG2SM, KM2DEG. + +% Copyright 1996-2009 The MathWorks, Inc. +% $Revision: 1.10.4.4 $ $Date: 2009/03/30 23:38:09 $ + +rad = deg2rad(deg); + +km = rad*6371; diff --git a/matlab/unitutils/deg2rad.m b/matlab/unitutils/deg2rad.m new file mode 100644 index 0000000..16420b6 --- /dev/null +++ b/matlab/unitutils/deg2rad.m @@ -0,0 +1,12 @@ +function angleInRadians = deg2rad(angleInDegrees) +% DEG2RAD Convert angles from degrees to radians +% +% DEG2RAD has been replaced by DEGTORAD. +% +% angleInRadians = DEG2RAD(angleInDegrees) converts angle units from +% degrees to radians. + +% Copyright 2007-2009 The MathWorks, Inc. +% $Revision: 1.9.4.5 $ $Date: 2009/04/15 23:16:12 $ + +angleInRadians = (pi/180) * angleInDegrees; diff --git a/matlab/unitutils/km2deg.m b/matlab/unitutils/km2deg.m new file mode 100644 index 0000000..7002f9f --- /dev/null +++ b/matlab/unitutils/km2deg.m @@ -0,0 +1,25 @@ +function deg = km2deg(km) +%KM2DEG Convert distance from kilometers to degrees +% +% DEG = KM2DEG(KM) converts distances from kilometers to degrees as +% measured along a great circle on a sphere with a radius of 6371 km, the +% mean radius of the Earth. +% +% DEG = KM2DEG(KM,RADIUS) converts distances from kilometers to degrees +% as measured along a great circle on a sphere having the specified +% radius. RADIUS must be in units of kilometers. +% +% DEG = KM2DEG(KM,SPHERE) converts distances from kilometers to degrees, +% as measured along a great circle on a sphere approximating an object in +% the Solar System. SPHERE may be one of the following strings: 'sun', +% 'moon', 'mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', +% 'uranus', 'neptune', or 'pluto', and is case-insensitive. +% +% See also DEG2KM, KM2RAD, KM2NM, KM2SM. + +% Copyright 1996-2009 The MathWorks, Inc. +% $Revision: 1.10.4.4 $ $Date: 2009/03/30 23:38:30 $ + +rad = km/6371; + +deg = rad2deg(rad); diff --git a/matlab/unitutils/rad2deg.m b/matlab/unitutils/rad2deg.m new file mode 100644 index 0000000..a13d909 --- /dev/null +++ b/matlab/unitutils/rad2deg.m @@ -0,0 +1,12 @@ +function angleInDegrees = rad2deg(angleInRadians) +% RAD2DEG Convert angles from radians to degrees +% +% RAD2DEG has been replaced by RADTODEG. +% +% angleInDegrees = RAD2DEG(angleInRadians) converts angle units from +% radians to degrees. + +% Copyright 2007-2008 The MathWorks, Inc. +% $Revision: 1.9.4.5 $ $Date: 2009/04/15 23:16:46 $ + +angleInDegrees = (180/pi) * angleInRadians; diff --git a/matlab/unitutils/second.m b/matlab/unitutils/second.m new file mode 100644 index 0000000..0196a9d --- /dev/null +++ b/matlab/unitutils/second.m @@ -0,0 +1,65 @@ +function s = second(d,f) +%SECOND Seconds of date or time. +% S = SECOND(D) returns the seconds given a serial date number or a +% date string, D. +% +% S = SECOND(S,F) returns the second of one or more date strings S using +% format string F. S can be a character array where each +% row corresponds to one date string, or one dimensional cell array of +% strings. +% +% All of the date strings in S must have the same format F, which must be +% composed of date format symbols according to Table 2 in DATESTR help. +% Formats with 'Q' are not accepted. +% +% For example, s = second(728647.558427893) or +% s = second('19-Dec-1994, 13:24:08.17') returns s = 8.17. +% +% See also DATEVEC, MINUTE, HOUR. + +% Copyright 1995-2008 The MathWorks, Inc. +% $Revision: 1.6.2.6 $ $Date: 2008/12/21 01:51:09 $ + +if nargin < 1 + error('finance:second:missingInputs', 'Please enter D.') +end +if nargin < 2 + f = ''; +end + +if ischar(d) + d = datenum(d,f); + sizeD = size(d); + +elseif iscell(d) + sizeD = size(d); + d = datenum(d(:),f); + +elseif isnumeric(d) + sizeD = size(d); + +else + error('finance:second:invalidInputClass', ... + 'Invalid date/time class.') +end + +% Generate date vectors from dates. +c = datevec(d(:)); + +% Extract seconds. Multiply by 1000 then round the result to make sure we avoid +% roundoff errors for milliseconds. +%extract seconds from hours +sh = c(:,4)*3600; +% extract second from minutes +sm = c(:,5)*60; +% extract seconds +ss = c(:, 6); +% round +ss = sh + sm + ss; +s = round(1000.*ss)./1000; + +% Reshape into the correct dims +s = reshape(s, sizeD); + + +% [EOF] diff --git a/matlab/unitutils/wavecut.m b/matlab/unitutils/wavecut.m new file mode 100644 index 0000000..24b1349 --- /dev/null +++ b/matlab/unitutils/wavecut.m @@ -0,0 +1,7 @@ +function [wc]=wavecut(wf,startp,endp) + +wc=wf(round(startp):round(endp)); + + + +end \ No newline at end of file diff --git a/python/phaseassoc/catalogConverter.py b/python/phaseassoc/catalogConverter.py new file mode 100644 index 0000000..8ec288b --- /dev/null +++ b/python/phaseassoc/catalogConverter.py @@ -0,0 +1,120 @@ +import pandas as pd + +from obspy.core.event import Catalog +from obspy.core.event import Event +from obspy.core.event import Pick +from obspy.core.event import Origin +from obspy.core.event import OriginQuality + +from obspy import UTCDateTime +from obspy.core.event.base import WaveformStreamID, Comment + + +class CatalogConverter: + """ + Class for converting SeisBench and pyOcto detection results to ObsPy Catalog, which can be saved as QuakeML. + """ + def __init__(self, config, picks, catalog_df, assignments_df, name_of_algorithm): + self._catalog_df = catalog_df + self._assignments_df = assignments_df + self._picks = picks + self._config = config + self._name_of_algorithm = name_of_algorithm + self.catalog = None + + def _convert_pick(self, p): + """ + Function converts picks from SeisBench to ObsPy format + :param p: SeisBench pick + :return: ObsPy pick + """ + pick = Pick() + pick.time = UTCDateTime(p.peak_time.datetime) + pick.waveform_id = WaveformStreamID(network_code=p.trace_id.split(".")[0], + station_code=p.trace_id.split(".")[1], + channel_code=p.trace_id.split(".")[2]) + if p.phase == 'P': + pick.phase_hint = self._config["P_hint"] + elif p.phase == 'S': + pick.phase_hint = self._config["S_hint"] + pick.evaluation_mode = 'automatic' + pick.evaluation_status = 'preliminary' + return pick + + def _convert_origin(self, origin_sb, list_of_picks_sb): + origin = Origin() + origin.time = UTCDateTime(pd.to_datetime(origin_sb.time, unit='s').to_pydatetime()) + origin.latitude = origin_sb.latitude # float + origin.longitude = origin_sb.longitude # float + origin.depth = origin_sb.depth # float in kilometers (SWIP5 origin version) down the see level + origin.depth_type = 'operator assigned' + # TODO: make sure that region is not necessary + # origin.region = self._config["region"] + origin.evaluation_mode = "automatic" + origin.evaluation_status = 'preliminary' + origin.comments.append(Comment(text=f"Localized by: {self._name_of_algorithm}", force_resource_id=False)) + origin.quality = OriginQuality(used_phase_count=len(list_of_picks_sb)) + return origin + + def _convert_event(self, origin_sb, list_of_picks_sb): + """ + Function convert GaMMa detection to ObsPy Event + :param origin_sb: + :param list_of_picks_sb: + :return: + """ + event = Event() + for p in list_of_picks_sb: + pick = self._convert_pick(p) + event.picks.append(pick) + origin = self._convert_origin(origin_sb, list_of_picks_sb) + event.origins.append(origin) + return event + + @staticmethod + def _append_pick_trace_id(pick, stream): + """ + Function assigns channel to pick - it is useful for work with SWIP + :param pick: + :param stream: + :return: + """ + channel = stream[0].stats.channel + if pick.phase == "P": + pick.trace_id = pick.trace_id + channel[:-1] + "Z" + if pick.phase == "S": + pick.trace_id = pick.trace_id + channel[:-1] + "E" + return pick + + def catalog2obspy(self): + """ + Function convert GaMMa catalog and SeisBench picks + :return: ObsPy Catalog object + """ + # TODO: make sure that resource id is necessary + #cat = Catalog(resource_id=self._config["resource_id"]) + cat = Catalog() + for j, row in self._catalog_df.iterrows(): + event = self._catalog_df.iloc[j] + event_picks = [self._picks[i] for i in + self._assignments_df[self._assignments_df["event_idx"] == + event["idx"]]["pick_idx"]] + event_obspy = self._convert_event(event, event_picks) + cat.append(event_obspy) + self.catalog = cat + + def save_catalog_to_file(self, file_path): + """ + Save ObsPy catalog to a file. + + Args: + file_path (str): The file path where the catalog will be saved. + + Returns: + None + """ + try: + self.catalog.write(file_path, format="QUAKEML") + print(f"Catalog saved successfully to {file_path}") + except Exception as e: + print(f"Error occurred while saving catalog: {e}") diff --git a/python/seedconverter/inventoryconverter.py b/python/seedconverter/inventoryconverter.py new file mode 100644 index 0000000..c48c6b6 --- /dev/null +++ b/python/seedconverter/inventoryconverter.py @@ -0,0 +1,62 @@ +import os + +import argparse + +from obspy import read_inventory + +def convert_inventory(input_file, + output_format, + output_dir="."): + + """ + Function to read Inventory from provided file and convert it to FDSN Station XML format. + The supported input file formats are: INVENTORYXML, RESP, SC3ML, SEED, STATIONTXT, STATIONXML, XSEED + + :type input_file: str + :param input_file: File name or path to the input inventory file. + :type output_format: str + :param output_format: Format of the output inventory file. + Supported formats: CSS, KML, SACPZ, SHAPEFILE, STATIONTXT, STATIONXML. + :type output_dir: str, optional + :param output_dir: Directory to which output files are written. + Defaults to current directory. + """ + inv = read_inventory(input_file) + result_filename = os.path.splitext(os.path.basename(input_file))[0] + "." + _get_extension(output_format) + inv.write(output_dir + "/" + result_filename, format=output_format) + return result_filename + +def _get_extension(output_format): + format = output_format.upper() + if format == 'STATIONXML': + return "xml" + elif format == 'STATIONTXT': + return "txt" + elif format == 'SHAPEFILE': + return "shp" + else: + return format.lower() + +def main(): + parser = argparse.ArgumentParser(description="Convert provided inventory file" + " to another inventory format.") + parser.add_argument("input_file", help="Provide inventory file to convert." + "The supported input file formats are: INVENTORYXML, RESP, SC3ML, " + "SEED, STATIONTXT, STATIONXML, XSEED") + parser.add_argument("--output_format", + help="Format of the output inventory file. " + "Supported formats: CSS, KML, SACPZ, SHAPEFILE, STATIONTXT, STATIONXML.", + type=str, default=None, required=True) + parser.add_argument("--output_dir", + help="Directory to which output files are written. " + "Defaults to current directory.", + type=str, default=".", required=False) + args = parser.parse_args() + + filename = convert_inventory(**vars(args)) + print('Created file:') + print(filename) + return + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/python/seedconverter/sacutil.py b/python/seedconverter/sacutil.py new file mode 100644 index 0000000..57dabad --- /dev/null +++ b/python/seedconverter/sacutil.py @@ -0,0 +1,98 @@ +from obspy.io.sac import SACTrace + + +def to_sac_trace(tr, + unit_type, + remove_response, + inv): + """ + Function converting the specified trace to SAC-specific format + + :type tr: obspy.core.trace.Trace + :param tr: Trace to be converted + :type unit_type: str + :param unit_type: Unit type of the result + :type remove_response: bool + :param remove_response: Information whether the response was removed, used to determine scale + :type inv: obspy.core.inventory.inventory.Inventory + :param inv: Inventory read either from SEED file or Station XML + :rtype: obspy.core.trace.Trace + :return: Converted trace + + """ + + # Assure that tr has fields present in SAC-type trace + sac = SACTrace.from_obspy_trace(tr) + sac = _prepare_sac_for_writing(sac, remove_response, unit_type, inv, tr.id, tr.stats['starttime']) + return sac.to_obspy_trace() + + +def _prepare_sac_for_writing(sac, remove_response, unit_type, inv, tr_id, tr_starttime): + """ + Method that adds some additional information to ordinary SACTrace file. + It adds location info and information about unit type and scale. + + When the response is removed, it scales down the data to nano units + nm/s or so. + + :type sac: obspy.io.sac.SACTrace + :param sac: SACTrace object, might be converted out of ordinary trace + :type remove_response: bool + :param remove_response: Parameter that lets user decide whether the + response is going to be removed or not + :type unit_type: str + :param unit_type: Unit type determined by _get_unit_type + :type inv: obspy.core.inventory.inventory.Inventory + :param inv: inventory contents + :type tr_id: str + :param tr_id: Trace's id string + :type tr_starttime: UTCDateTime + :param tr_starttime: Trace's start time + :rtype: obspy.io.sac.SACTrace + :return: SACTrace object with added additional information + """ + # Copy details of station's location + + sac.idep = _get_sac_idep_unit(unit_type) + if inv: + try: + coords = inv.get_coordinates(tr_id, tr_starttime) + sensitivity = inv.get_response(tr_id, tr_starttime).instrument_sensitivity.value + sac.stla = coords["latitude"] + sac.stlo = coords["longitude"] + sac.stel = coords["elevation"] + sac.user2 = sensitivity + + if remove_response: + # Divide by nano multiplier to get a data with removed response + # scaled to nano unit (nm/s or so). + sac.data = sac.data / _get_nano_multiplier() + sac.scale = -12345 + else: + sac.scale = (1 / sensitivity / _get_nano_multiplier()) + except Exception as err: + raise Exception("Cannot read location and sensitivity information: " + str(err)) + else: + sac.scale = -12345 + return sac + + +def _get_nano_multiplier(): + """ + Returns a multiplier to obtain nano value from a full value. + + :rtype: float + :return: Multiplier value + """ + return 10**(-9) + + +def _get_sac_idep_unit(unit_abbrev): + if unit_abbrev == "DISP": + return "idisp" + elif unit_abbrev == "VEL": + return "ivel" + elif unit_abbrev == "ACC": + return "iacc" + else: + return "iunkn" diff --git a/python/seedconverter/seedconverter.py b/python/seedconverter/seedconverter.py new file mode 100644 index 0000000..4feff78 --- /dev/null +++ b/python/seedconverter/seedconverter.py @@ -0,0 +1,327 @@ +import sys +import os +import warnings + +import obspy +import argparse + +import unit +import sacutil + +import numpy as np + +from obspy.io.xseed import Parser +from obspy.io.xseed.utils import SEEDParserException +from obspy.core.util import AttribDict +from obspy.core import Stream + + +def convert_seed(seed_file, + inv_file=None, + input_unit=None, + output_filetype=None, + remove_response=False, + zero_mean=False, + pre_filtering=None, + output_unit=None, + single_output_file=False, + output_dir="."): + """ + Function to convert provided file to SAC, ASCII or MSEED + + + :type seed_file: str + :param seed_file: File name or path to the seed (seed or msd) file. + :type inv_file: str + :param inv_file: File name or path to inventory file in any inventory format supported by ObsPy. + :type input_unit: str + :param input_unit: Unit of the input file: 'ACC', 'DISP' or 'VEL'. Specified only for miniSEED files (full SEED + files are always in 'COUNTS'). If the value is not set, no unit ('COUNTS') is assumed. + :type output_filetype: str + :param output_filetype: Output filetype. Choose either SAC, MSEED, SLIST or INTERNAL_ASCII. + When used SLIST, sample values are saved with `%+.10e` formatting. + INTERNAL_ASCII is an internal format used for some apps, that uses only one column of values + This will be default formatting since ObsPy 1.1.0 + :type remove_response: bool + :param remove_response: Response removal from the waveform + Defaults to False. + Removal of the instrument's response from the waveform + By default it will save waveform without response removal procedure + This function uses obspy.core.stream.Stream.remove_response. + When set to True, user has to specify parameters zero_mean, + pre_filtering and output_unit. Otherwise the script will raise an + exception. + :type zero_mean: bool, optional + :param zero_mean: If true the mean of the data is subtracted + For details check the ObsPy's documentation for + obspy.core.stream.Stream.remove_response + :type pre_filtering: (float, float, float, float), optional + :param pre_filtering: Apply a bandpass filter to the data trace before + deconvolution. + The list or tuple defines the four corner frequencies (f1,f2,f3,f4) + of a cosine taper which is one between f2 and f3 + and tapers to zero for f1 < f < f2 and f3 < f < f4. + For details check the ObsPy's documentation for + obspy.core.stream.Stream.remove_response + :type output_unit: str, optional + :param output_unit: Unit of the output waveform. + This parameter converts counts to real values during response removal, + or, if response_removal is false is just written to the converted file. + When used ``auto`` the script will determine the unit automatically + with use of second letter of a channel code which determines + instrument type. + For details see Appendix A to SEED Manual v2.4. + + In case user need to convert all channels to common unit, + there can be chosen ``DISP`` for displacement, + ``VEL`` for velocity and ``ACC`` for acceleration. + When this parameter is None and remove_response is set to True + script will throw an Exception. + :type single_output_file: bool + :param single_output_file: if True, a single file with all traces will be created + (available only for conversion to MSEED), otherwise, separate files with names in format + ``network.station.location.channel.starttime_microsecond.output_filetype`` will be created + :type output_dir: str, optional + :param output_dir: Directory to which output files are written. + Defaults to current directory. + + :return: Filenames of created files. Filenames follow pattern: + ``network.station.location.channel.starttime_microsecond.SAC`` + + :raises + + .. note:: + For more information on parameters used in this function see + ObsPy's documentation in ``obspy.core.stream.Stream.remove_response`` + """ + + st = obspy.read(seed_file) + if inv_file: + inv = obspy.read_inventory(inv_file) + elif remove_response: + raise ValueError("Cannot remove response if inventory is not specified") + else: + inv = None + + if output_unit: + if inv: + # assuming all the traces have the same unit + input_type = unit.determine_instrument_type_from_inventory(inv, st[0].id, st[0].stats.starttime) + else: + try: + # if we don't have the inventory and we have a full SEED file, we can attempt to read unit from it + resp = Parser(seed_file) + # assuming all the traces have the same unit + input_type = unit.determine_instrument_type_from_blockette(resp, st[0].id) + except (OSError, SEEDParserException): + warnings.warn("The provided file is not a SEED volume - cannot determine unit automatically") + + if output_unit == "auto": + output_unit = input_type + + if remove_response and output_unit is None: + raise ValueError("You have to provide output_unit parameter" + " for response removal procedure.\n" + "Available output_unit values are DISP, " + "VEL, ACC. Provide one or skip removing " + "response by passing " + "remove_response=False parameter") + + if remove_response: + if pre_filtering is None: + tr_sampling_rate = st[0].stats.sampling_rate + pre_filt = [0.005, 0.006, 0.9 * 0.5 * tr_sampling_rate, 0.5 * tr_sampling_rate] + else: + pre_filt = pre_filtering + try: + _remove_response(st, inv, input_type, output_unit, pre_filt, zero_mean) + except (ValueError, SEEDParserException) as exc: + raise Exception("There was a problem with removing response from the file: " + str(exc)) + elif output_unit and input_unit and input_unit != output_unit: + _convert_unit(st, input_unit, output_unit) + # we are not converting anything, but the input file already had a unit (might happen only with mseed files), + # so it should be written to the output file + elif input_unit: + output_unit = input_unit + + if single_output_file: + if output_filetype != "MSEED": + raise ValueError("Writing all traces to one file is available only for conversion to MSEED format") + return _convert_to_single_mseed(seed_file, st, output_dir) + else: + return _convert_to_separate_files(st, inv, remove_response, output_unit, output_filetype, output_dir) + + +# remove response and correct numerical errors (depending on the input and output type) +def _remove_response(stream, inv, input_type, output_unit, pre_filt, zero_mean): + if input_type == "ACC" and output_unit == "DISP": + stream.remove_response(inventory=inv, pre_filt=pre_filt, zero_mean=zero_mean, output="VEL") + stream.filter("highpass", freq=1.0) + stream.integrate(method="cumtrapz") + stream.filter("highpass", freq=0.5) + else: + stream.remove_response(inventory=inv, pre_filt=pre_filt, zero_mean=zero_mean, output=output_unit) + if ((input_type == "ACC" and output_unit == "VEL") or + (input_type == "VEL" and (output_unit == "DISP" or output_unit == "VEL"))): + stream.filter("highpass", freq=1.0) + + +# unit conversion - applied when the input data already has response removed and is converted to one of the units +def _convert_unit(stream, input_unit, output_unit): + if input_unit == "DISP": + if output_unit == "ACC": # DIFF x2 + stream.differentiate(method="gradient") + stream.differentiate(method="gradient") + elif output_unit == "VEL": # DIFF + stream.differentiate(method="gradient") + elif input_unit == "VEL": + if output_unit == "ACC": # DIFF + stream.differentiate(method="gradient") + elif output_unit == "DISP": # INTEG + FILTER + stream.integrate(method="cumtrapz") + stream.filter("highpass", freq=1.0) + elif input_unit == "ACC": + if output_unit == "VEL": # INTEG + FILTER + stream.integrate(method="cumtrapz") + stream.filter("highpass", freq=1.0) + elif output_unit == "DISP": # (INTEG + FILTER) x2 + stream.integrate(method="cumtrapz") + stream.filter("highpass", freq=1.0) + stream.integrate(method="cumtrapz") + stream.filter("highpass", freq=0.5) + else: + raise TypeError("Cannot convert from ", input_unit) + + +def _convert_to_single_mseed(seed_filename, stream, output_dir): + result_filename = os.path.splitext(os.path.basename(seed_filename))[0] + ".msd" + stream.write(output_dir + "/" + result_filename, format="MSEED") + return [result_filename] + + +def _convert_to_separate_files(stream, inv, remove_response, output_unit, output_filetype, output_dir): + output_stream = Stream() + output_file_extension = output_filetype + + for tr in stream: + if output_filetype == "SAC": + output_file_extension = "sac" + tr = sacutil.to_sac_trace(tr, output_unit, remove_response, inv) + + elif output_filetype in ["SLIST", "TSPAIR", "SH_ASC"]: + output_file_extension = output_filetype.lower() + ".ascii" + slist_unit = unit.translate_instrument_type_to_unit(output_unit) if output_unit else "COUNTS" + tr.stats.ascii = AttribDict({"unit": slist_unit}) + + elif output_filetype == "MSEED": + output_file_extension = "msd" + + elif output_filetype == "INTERNAL_ASCII": + output_file_extension = "ascii" + + output_stream.append(tr) + + return _write_separate_output_files(output_stream, output_filetype, output_file_extension, output_dir) + + +def _write_separate_output_files(stream, output_filetype, file_extension, output_dir): + """ + Writes all Trace objects present in Stream to separate files. Filetype + is set according to User's will. + + :type stream: obspy.core.stream.Stream + :param stream: Obspy stream with traces to save separately. + :type output_filetype: str + :param output_filetype: Contains filetype to save. Currently supported + ``SAC``, ``SLIST`` and ``MSEED``. + """ + result_filenames = list() + for tr in stream: + result_filename = ".".join([tr.id, + str(tr.stats.starttime.microsecond), + file_extension]) + result_filepath = output_dir + "/" + result_filename + if output_filetype == "INTERNAL_ASCII": + _write_ascii_one_column(tr, result_filepath) + else: + tr.write(result_filepath, format=output_filetype) + result_filenames.append(result_filename) + return result_filenames + + +def _write_ascii_one_column(trace, filename): + """ + Writes trace data into an ASCII file in one-column format (i.e. no header, single column of values). + """ + with open(filename, 'wb') as fh: + data = trace.data.reshape((-1, 1)) + dtype = data.dtype.name + fmt = '%f' + if dtype.startswith('int'): + fmt = '%d' + elif dtype.startswith('float64'): + fmt = '%+.16e' + np.savetxt(fh, data, delimiter=b'\t', fmt=fmt.encode('ascii', 'strict')) + + +def main(argv): + def str2bool(v): + if v.lower() in ("True", "TRUE", "yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("False", "FALSE", "no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + parser = argparse.ArgumentParser(description="Convert provided file" + " to SAC or SLIST (ASCII).") + + parser.add_argument("seed_file", help="Provide SEED or mSEED file to convert") + parser.add_argument("--inv_file", help="Provide inventory file") + parser.add_argument("--input_unit", + help="Provide input unit. " + "ACC, VEL or DISP are available.", + type=str, default=None, required=False) + parser.add_argument("--output_filetype", + help="Provide output filetype. " + "MSEED, SAC, SLIST and INTERNAL_ASCII are available.", + type=str, default=None, required=True) + parser.add_argument("--remove_response", + help="Remove instrument's response from the waveform", + type=str2bool, default=False) + parser.add_argument("--zero_mean", + help="If true the mean of the data is subtracted", + type=str2bool, default=False, required=False) + parser.add_argument("--pre_filtering", + help="Apply a bandpass filter to the data trace " + "before deconvolution", + type=float, nargs="+", default=None, + required=False) + parser.add_argument("--output_unit", + help="Unit to which waveform should be converted " + "during response removal. When set to auto, " + "the unit will be automatically determined with " + "use of the second letter of channel code which " + "determines the instrument type." + "For details see Appendix A, SEED Manual v2.4.", + type=str, default=None, required=False) + parser.add_argument("--single_output_file", + help="Flag deciding whether all the traces will be written to " + "a single file, if set to False, each trace will be " + "written to a separate file. True is available only " + "for conversion to MSEED", + type=str2bool, default=False, required=False) + parser.add_argument("--output_dir", + help="Directory to which output files are written. " + "Defaults to current directory.", + type=str, default=".", required=False) + args = parser.parse_args() + + filenames = convert_seed(**vars(args)) + print('Created files:') + print(', '.join(filenames)) + return + + +if __name__ == "__main__": + main(sys.argv) diff --git a/python/seedconverter/unit.py b/python/seedconverter/unit.py new file mode 100644 index 0000000..c38ce96 --- /dev/null +++ b/python/seedconverter/unit.py @@ -0,0 +1,95 @@ +def determine_instrument_type_from_blockette(parser, channel_id): + """ + Determines type of instrument used to record a channel with provided + channel_id. + + :type parser: obspy.io.xseed.parser.Parser + :param parser: Parser object parsed from Full SEED or dataless or + StationXML. + :type channel_id: str + :param channel_id: Channel_id object generated by obspy.trace.Trace.id + :rtype: str + :return: Returns str determining the type of instrument + """ + return translate_unit_to_instrument_type(determine_unit_from_blockette(parser, channel_id)) + + +def determine_unit_from_blockette(parser, channel_id): + """ + Determines the unit used to record a channel with provided + channel_id. + + :type parser: obspy.io.xseed.parser.Parser + :param parser: Parser object parsed from Full SEED or dataless or + StationXML. + :type channel_id: str + :param channel_id: Channel_id object generated by obspy.trace.Trace.id + :rtype: str + :return: Returns str containing a real unit name that can be passed to + translate_unit_to_instrument_type method + to obtain a str compatible with response removal procedure + """ + for blkt in parser._select(channel_id): + if not blkt.id == 52: + continue + + for bl in parser.blockettes[34]: + if bl.unit_lookup_code == blkt.units_of_signal_response: + return bl.unit_name + + +def determine_instrument_type_from_inventory(inv, channel_id, time): + """ + Determines type of instrument used to record a channel with provided channel_id at the provided time. + + :type inv: obspy.core.inventory.inventory.Inventory + :param inv: ObsPy Inventory object parsed from a file + :type channel_id: str + :param channel_id: Channel_id object generated by obspy.trace.Trace.id + :type time: obspy.core.utcdatetime.UTCDateTime + :param time: time for which the unit should be determined in the inventory (e.g. start time of a trace) + :rtype: str + :return: Returns str determining the type of instrument + """ + return translate_unit_to_instrument_type(determine_unit_from_inventory(inv, channel_id, time)) + + +def determine_unit_from_inventory(inv, channel_id, time): + """ + Determines unit used to record a channel with provided channel_id at the provided time. + + :type inv: obspy.core.inventory.inventory.Inventory + :param inv: ObsPy Inventory object parsed from a file + :type channel_id: str + :param channel_id: Channel_id object generated by obspy.trace.Trace.id + :type time: obspy.core.utcdatetime.UTCDateTime + :param time: time for which the unit should be determined in the inventory (e.g. start time of a trace) + :rtype: str + :return: Returns str containing a real unit name that can be passed to + translate_unit_to_instrument_type method + to obtain a str compatible with response removal procedure + """ + resp = inv.get_response(channel_id, time) + return resp.instrument_sensitivity.input_units + + +def translate_unit_to_instrument_type(unit_name): + if unit_name == "M": + return "DISP" + elif unit_name == "M/S": + return "VEL" + elif unit_name == "M/S**2": + return "ACC" + else: + raise TypeError("Unknown unit code ", unit_name) + + +def translate_instrument_type_to_unit(unit_type): + if unit_type == "DISP": + return "M" + elif unit_type == "VEL": + return "M/S" + elif unit_type == "ACC": + return "M/S**2" + else: + raise TypeError("Unknown unit code ", unit_type)