SERA Toolbox1 and Toolbox2 standalone versions

This commit is contained in:
2019-07-05 10:31:31 +02:00
parent 6f5c52a565
commit 758751a7b0
71 changed files with 54733 additions and 3 deletions

View File

@@ -0,0 +1,292 @@
% PROGRAM: Clustering
% VERSION: [Interactive Standalone Version] V1.8
% COMPATIBLE with Matlab version 2017b or later
% TOOLBOX: "Clustering/Transformation to ED Toolbox" within SERA Project
% DOCUMENT: "READ_ME_App_1B_v1_Description_Cluster_Analysis.docx"
% -------------------------------------------------------------------------------------------------------
% The function gathers several different clustering algorithms included in
% MATLAB in order to perform Cluster Analysis for datasets tranformed
% to Equivalent Dimensions
% --------------------------------------------------------------------------------------------------------
% INPUT:
% THE PROGRAM USES AS INPUT THE OUTPUT DATA OBTAINED
% AFTER RUNNING THE "ED_ToolBox_Wrapper.mat" PROGRAM
% --------------------------------------------------------------------------------------------------------
% OVERVIEW: This Application is a Matlab function which akes as input
% the output file created after "ED_ToolBox_Wrapper.mat", therefore all
% analyses are performed within the Equivalent Dimension phase space.
% Output results (matlab structures/variables) are produced as well as
% some figures in some particular cases.
% --------------------------------------------------------------------------------------------------------
% AUTHORS: K. Leptokaropoulos,
% last updated: 03/2019, within SERA PROJECT, EU Horizon 2020 R&I
% programme under grant agreement No.730900
% CURRENT VERSION: v1.8 **** [INTERACTIVE STANDALONE VERSION!!]
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% PLEASE refer to the accompanying document:
% "READ_ME_App_1B_v1_Description_Cluster_Analysis.docx"
% for description of the Application and its requirements.
% --------------------------------------------------------------------------------------------------------
% DSCRIPTION: The function is actually a compilation of existing and well-
% known clustering algorithms available within the MATLAB
% libraries, therefore the corresponding functions,descriptions
% information and references can be retrieved from the Matlab
% help. The function used are "kmeans", "linkage", "cluster" &
% "fcm".
% NOTE: Working in the Equivalent Dimension phase space,
% leads to the usage of Euclidean distance metric, therefore all
% other available metrics are disregarded within this Function.
% --------------------------------------------------------------------------------------------------------
% INPUT: The function takes as input the output file generated after running
% <EFBFBD>ED_ToolBox_Wrapper.mat", therefore all analyses are performed
% within the Equivalent Dimension phase space:
% --- Tdata: is the output of "ED_ToolBox_Wrapper.mat" function,
% corresponding to the dataset with parameters (Seismic/
% Production) after they are transformed to ED space.
% --- vectors: The User is requested to specify the columns from "Tdata"
% structure, to be used in the Cluster Analysis
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% The User is then requested to enter values for some additional parameters.
% Input Parameters Overview:
% --- N: number of clusters to be constructed after the analysis
% (default: 4, however, this is a completely arbitrary selection)
% --- Meth: Clustering Algorithm Selection.
% Possible arguments: 'Partitioning','Hierarchical' and 'Fuzzy'
% --- CTMeth: Cluster Tree method
% Possible arguments:'average<EFBFBD>,<EFBFBD>centroid<EFBFBD>,<EFBFBD>complete<EFBFBD>,<EFBFBD>median<EFBFBD>,
% <EFBFBD>single<EFBFBD>,<EFBFBD>ward<EFBFBD>, and <EFBFBD>weighted<EFBFBD>
% NOTE: CTMeth is only applicable for Meth='Hierarchical'
% --- Lnodes: number of leaf nodes for plotting the Ward Diagram
% (This applies only in the visualization option)
% ---------------------------------------------------------------------------------------------------------
% OUTPUTS: <> Cluster: Structure, with a size corresponding to the number
% of clusters set by the use. It consists of the 3 following fields
% - Cluster.id --> field with the parameters for each cluster
% - Cluster.index --> field with index of the events comprising the
% clusters for reference to the input data.
% - Cluster.Center --> Center of mass of each Cluster
% --------------------------------------------------------------------------------------------------------
% <> ClusterColumns --> string array with transformed parameters
% corresponing to the columns of Cluster.id
% <> *** L --> Array (double) used for visualization of the Dendrogram
% (valid only when Meth='Hierarchical'
% --------------------------------------------------------------------------------------------------------
% <> FIGURES:
% - Cluster_output.jpg: Only valid for 2D and 3D cases
% - Dendrogram_uutput.jpg: Ward diagram, only valid when Meth='Hierarchical' is selected
% --------------------------------------------------------------------------------------------------------
% <> Output ASCII Files: For convenience of the User, three additional ASCII
% files are produced with the main results of the Application:
% - Clusters_Original.txt : File containing the Original parameters of Seismic and
% Production data selected for the Analysis. Each
% column corresponds to the values of one parameter,
% which is defined in <EFBFBD>Data_Fields.txt<EFBFBD> output file. THE
% LAST COLUMN in the file, is an integer corresponding
% to the number of cluster that the eventbelongs to.
% NOTE!!!! that the file contains FULL (original) SAMPLE,
% therefore NANS may be included.
% - Clusters_Transformed.txt : File containing Transformed parameters of Seismic
% and Production data derived by the Analysis. Each
% column corresponds to the values of one parameter,
% which is defined in <EFBFBD>Data_Fields.txt<EFBFBD> output file.
% NOTE!!! that the file contains only the values that
% were transformed (non-NANs)
% - Clusters_Fields.txt : File containing the labels (Fileds) of each column of the
% previously described output (ASCII) files.
% ---------------------------------------------------------------------------------------------------------
% LICENSE
% This is free software: you can redistribute it and/or modify it under
% the terms of the GNU General Public License as published by the
% Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful, but
% WITHOUT ANY WARRANTY; without even the implied warranty
% of MERCHANTABILITY or FITNESS FOR A PARTICULAR
% PURPOSE. See the GNU General Public License for more details.
% ---------------------------------------------------------------------------------------------------------
clear; clc;
close all
mkdir Outputs_Clustering
% ------- Path to the data directory -------
cd Transformed_Data
d=dir;
dstr={d.name};
% -----------------------------------------------
% -------- Select Input Data File & ---------
% ----- Parameters to be clustered -------
[s,s1]=DatLoad(dstr);
load(dstr{s});
% ----------------------------------------------
cd ../
% FIRST OF ALL, lets have the events index related to the input catalog!!!!
% ...
% Show in Screen Clustering Options
%open('Cluster_tree.pdf')
% A=imread('Cluster_tree.jpg');
% imshow(A)
% ----------------------------------------
% prepare data and eliminate Nans'
a1=[];
for i=1:length(s1)
a1=[a1,Tdata(s1(i)).all];
ClusterColumns{i}=Tdata(s1(i)).field;
end
%[indall,NP]=size(a1);
a=[];cou=0;indx=zeros(1000000,1);
for i=1:length(a1);
if isnan(prod(a1(i,:)))==0;a=[a;a1(i,:)];cou=cou+1;indx(cou)=i;end
end
indx=indx';
% SET THE NUMBER OF CLUSTERS
N=input('Please set the number of clusters: ');
if cou<=N;error(['The number of events (',num2str(cou),') is smaller than the selected number of clusters (',num2str(N),')']);end
% ------- Select Clustering Method --------
% Set clustering Method, among:
% 'Partitioning' - 1, 'Hierarchical' -2, 'Fuzzy' -3
[Meth,ok]=listdlg('PromptString','Select Clustering Algorithm:',...
'ListString',{'Partitioning','Hierarchical','Fuzzy'},'ListSize',[160,100],'SelectionMode','single');
switch Meth
case 1 % for 'Partitioning'
% the following lines are disabled because Euclidean Metric applies in ED phase space
% Set DIS, i.e. Distance Metric among:
%metrics={'sqeuclidean','cityblock','cosine'};
%[DIS,ok]=listdlg('PromptString','Select Distance Metric:',...
% 'ListString',metrics,'ListSize',[160,100],'SelectionMode','single');
[c,centloc,sd]=kmeans(a,N,'Distance','sqeuclidean');
case 2 % for 'Hierarchical'
% Set method for computation of cluster tree among:
CTCmeth={'average','centroid','complete','median','single','ward','weighted'};
[CMeth,ok]=listdlg('PromptString','Select Method for Cluster Tree Computation:',...
'ListString',CTCmeth,'ListSize',[160,100],'SelectionMode','single');
% the following lines are disabled because Euclidean Metric applies in ED phase space
% Set the Distance metric among [select only the most important ones]
%metrics={'euclidean','squaredeuclidean','seuclidean','mahalanobis','minkowski',...
% 'chebychev','cosine','correlation','spearman'};
%[DIS,ok]=listdlg('PromptString','Select Distance Metric:',...
% 'ListString',metrics,'ListSize',[160,100],'SelectionMode','single');
L=linkage(a,CTCmeth(CMeth),'euclidean');
c=cluster(L,N);
%c=clusterdata(a,'maxclust',N);
% Consider Plotting the Dendrogram!!
case 3 %for 'Fuzzy'
% Needs to remove Nans!!
[cen,u] = fcm(a,N);
maxu=max(u);
for i=1:N
ind=find(u(i,:)==maxu);
c(ind)=i;
end
c=c';
end
% % EXTRACT CLUSTERS
for j=1:N
Cluster(j).id=a(c==j,:);
Cluster(j).index=indx(c==j);
Cluster(j).Center=mean(Cluster(j).id);
if numel(Cluster(j).Center)~=numel(s1);Cluster(j).Center=Cluster(j).id;end
end
%% Create Output ASCII FILE:
a(:,size(a,2)+1)=c;
for j=1:length(s1);b(:,j)=Tdata(s1(j)).origval_all;end
nb=size(b,2)+1;
for j=1:size(Cluster,2);b(Cluster(j).index,nb)=j;end
%% Store Outputs
cd Outputs_Clustering\
save('Cluster.mat','Cluster')
save('ClusterColumns.mat','ClusterColumns')
fid1=fopen('Clusters_Transformed.txt','w');
fid2=fopen('Clusters_Original.txt','w');
fid3=fopen('Clusters_Fields.txt','w');
fprintf(fid1,[repmat('%32.16f ',[1,size(a,2)-1]),' %d \n'],a');
fprintf(fid2,[repmat('%32.16f ',[1,size(b,2)-1]),' %d \n'],b');
for i=1:size(ClusterColumns,2)
fprintf(fid3,'%s ',ClusterColumns{i});
end
fclose all;
cd ../
%% Plotting example
if length(s1)==2
for j=1:N
plot(Cluster(j).id(:,1),Cluster(j).id(:,2),'o');hold on;axis square
plot(Cluster(j).Center(1),Cluster(j).Center(2),'kx','MarkerSize',16,'LineWidth',2)
end
xlabel({Tdata(s1(1)).field},'FontSize',14,'Interpreter','none')
ylabel({Tdata(s1(2)).field},'FontSize',14,'Interpreter','none')
cd Outputs_Clustering\;saveas(gcf,'Cluster_output.jpg');cd ../
elseif length(s1)==3
for j=1:N
plot3(Cluster(j).id(:,1),Cluster(j).id(:,2),Cluster(j).id(:,3),'o');hold on;grid on;axis square
plot3(Cluster(j).Center(1),Cluster(j).Center(2),Cluster(j).Center(3),'kx','MarkerSize',16,'LineWidth',2)
end
xlabel({Tdata(s1(1)).field},'FontSize',14,'Interpreter','none')
ylabel({Tdata(s1(2)).field},'FontSize',14,'Interpreter','none')
zlabel({Tdata(s1(3)).field},'FontSize',14,'Interpreter','none')
cd Outputs_Clustering\;saveas(gcf,'Cluster_output.jpg');cd ../
end
if Meth==2;
Lnodes=dialog1('number of leaf nodes',{num2str(size(c,1))});
figure;dendrogram(L,Lnodes,'ColorThreshold', L(length(L)-N+2,3));
cd Outputs_Clustering\;saveas(gcf,'Dendrogram_output.jpg');cd ../;end
% *************************** FUNCTIONS ***************************
% ****-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-****
%% --------------------------------------------------------------------------------------
%Select Seismic Catalog and the parameters the user wishes to study
function [s,s1]=DatLoad(dstr)
% Select Seismic Catalog
[s,ok]=listdlg('PromptString','Select Transformed Dataset:',...
'SelectionMode','single',...
'ListString',dstr);
if ok; load(dstr{s});end
%Select Parameters from Seismic Catalog
[s1,ok]=listdlg('PromptString','Select field(s):',...
'ListString',{Tdata.field});
end
%%
function [ou]=dialog1(name,defaultanswer)
prompt=['\fontsize{12} Please enter ',name, ':'];
prompt={prompt};
numlines=1; opts.Interpreter='tex';
ou=inputdlg(prompt,name,numlines,defaultanswer,opts);ou=str2num(ou{1});
end