SERA Toolbox1 and Toolbox2 standalone versions

This commit is contained in:
2019-07-05 10:31:31 +02:00
parent 6f5c52a565
commit 758751a7b0
71 changed files with 54733 additions and 3 deletions

View File

@@ -0,0 +1,200 @@
% FUNCTION: Clustering
% VERSION: [Wrapper Standalone Version] V2.8
% COMPATIBLE with Matlab version 2017b or later
% TOOLBOX: "Clustering/Transformation to ED Toolbox" within SERA Project
% DOCUMENT: "READ_ME_App_1B_v2_Description_Cluster_Analysis.docx"
% ----------------------------------------------------------------------------------------------------------------
% The function gathers several different clustering algorithms included in MATLAB in order
% to perform Cluster Analysis for datasets tranformed to Equivalent Dimensions
% --------------------------------------------------------------------------------------------------------
% OVERVIEW: This Application is a Matlab function which takes as input
% the output file created after "ED_ToolBox.mat", therefore all analyses are
% performed in the Equivalent Dimension phase space.Please check also the
% accompanying auxiliary scripts 'Clustering_wrapper' and 'Plot_Clustering'
% for a specific application (scenario) & plotting results.
% --------------------------------------------------------------------------------------------------------
% AUTHORS: K. Leptokaropoulos,
% last updated: 03/2019, within SERA PROJECT, EU Horizon 2020 R&I
% programme under grant agreement No.730900
% CURRENT VERSION: v2.8 **** [Wrapper Standalone Version]
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% PLEASE refer to the accompanying document:
% "READ_ME_App_1B_v2_Description_Cluster_Analysis.docx"
% for description of the Application and its requirements.
% --------------------------------------------------------------------------------------------------------
% DESCRIPTION: The function is actually a compilation of existing and well-
% known clustering algorithms available within the MATLAB
% libraries, therefore the corresponding functions,descriptions
% information and references can be retrieved from the Matlab
% help.The functions used are "kmeans", "linkage", "cluster" &
% "fcm".
% NOTE: Working in the Equivalent Dimension phase space,
% leads to the usage of Euclidean distance metric, therefore all
% other available metrics are disregarded within this Function.
% --------------------------------------------------------------------------------------------------------
% INPUT: The function takes as input the output file generated after running
% <EFBFBD>ED_ToolBox.mat", therefore all analyses are performed within the
% Equivalent Dimension phase space:
% --- Tdata: is the output of "ED_ToolBox_Wrapper.mat" function,
% corresponding to the dataset with parameters (Seismic/
% Production) after they are transformed to ED space.
% --- vectors: The User is requested to specify the columns from "Tdata"
% structure, to be used in the Cluster Analysis
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% The User is then requested to enter values for some additional parameters.
% Input Parameters Overview:
% --- N: number of clusters to be constructed after the analysis
% (default: 4, however, this is a completely arbitrary selection)
% --- Meth: Clustering Algorithm Selection.
% Possible arguments: 'Partitioning','Hierarchical' and 'Fuzzy'
% --- CTMeth: Cluster Tree method
% Possible arguments:'average<EFBFBD>,<EFBFBD>centroid<EFBFBD>,<EFBFBD>complete<EFBFBD>,<EFBFBD>median<EFBFBD>,
% <EFBFBD>single<EFBFBD>,<EFBFBD>ward<EFBFBD>, and <EFBFBD>weighted<EFBFBD>
% NOTE: CTMeth is only applicable for Meth='Hierarchical'
% --- Lnodes: number of leaf nodes for plotting the Ward Diagram
% (This applies only in the vizualization option)
% ---------------------------------------------------------------------------------------------------------
% OUTPUTS: <> Cluster: Structure, with a size corresponding to the number
% of clusters set by the use. It consists of the 3 following fields
% - Cluster.id --> field with the parameters for each cluster
% - Cluster.index --> field with index of the events comprising the
% clusters for reference to the input data.
% - Cluster.Center --> Center of mass of each Cluster
% --------------------------------------------------------------------------------------------------------
% <> ClusterColumns --> string array with transformed parameters
% corresponing to the columns of Cluster.id
% <> *** L --> Array (double) used for visualization of the Dendrogram
% (valid only when Meth='Hierarchical'
% --------------------------------------------------------------------------------------------------------
% <> FIGURES:
% - Cluster_output.jpg: Only valid for 2D and 3D cases
% - Dendrogram_uutput.jpg: Ward diagram, only valid when Meth='Hierarchical' is selected
% --------------------------------------------------------------------------------------------------------
% <> Output ASCII Files: For convenience of the User, three additional ASCII
% files are produced with the main results of the Application:
% - Clusters_Original.txt : File containing the Original parameters of Seismic and
% Production data selected for the Analysis. Each
% column corresponds to the values of one parameter,
% which is defined in <EFBFBD>Data_Fields.txt<EFBFBD> output file. THE
% LAST COLUMN in the file, is an integer corresponding
% to the number of cluster that the eventbelongs to.
% NOTE!!!! that the file contains FULL (original) SAMPLE,
% therefore NANS may be included.
% - Clusters_Transformed.txt : File containing Transformed parameters of Seismic
% and Production data derived by the Analysis. Each
% column corresponds to the values of one parameter,
% which is defined in <EFBFBD>Data_Fields.txt<EFBFBD> output file.
% NOTE!!! that the file contains only the values that
% were transformed (non-NANs)
% - Clusters_Fields.txt : File containing the labels (Fileds) of each column of the
% previously described output (ASCII) files.
% ---------------------------------------------------------------------------------------------------------
% LICENSE
% This is free software: you can redistribute it and/or modify it under
% the terms of the GNU General Public License as published by the
% Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful, but
% WITHOUT ANY WARRANTY; without even the implied warranty
% of MERCHANTABILITY or FITNESS FOR A PARTICULAR
% PURPOSE. See the GNU General Public License for more details.
% ---------------------------------------------------------------------------------------------------------
%%
function [a,Cluster,ClusterColumns,L]=Clustering_V2_8(vectors,N,Meth,CTmeth,Tdata);
mkdir Outputs_Clustering
%Create Input Array
a1=[];
for i=1:length(vectors)
a1=[a1,Tdata(vectors(i)).all];
ClusterColumns{i}=Tdata(vectors(i)).field;
end
%[indall,NP]=size(a1);
a=[];cou=0;indx=zeros(1000000,1);
for i=1:length(a1);
if isnan(prod(a1(i,:)))==0;a=[a;a1(i,:)];cou=cou+1;indx(cou)=i;end
end
indx=indx';
if cou<=N;error(['The number of events (',num2str(cou),') is smaller than the selected number of clusters (',num2str(N),')']);end
switch Meth
case 'Partitioning'
% the following lines are disabled because Euclidean Metric applies in ED phase space
% Set DIS, i.e. Distance Metric among:
%metrics={'sqeuclidean','cityblock','cosine'};
%[DIS,ok]=listdlg('PromptString','Select Distance Metric:',...
% 'ListString',metrics,'ListSize',[160,100],'SelectionMode','single');
[c,centloc,sd]=kmeans(a,N,'Distance','sqeuclidean');
L=[];
case 'Hierarchical'
% Set method for computation of cluster tree among:
% the following lines are disabled because Euclidean Metric applies in ED phase space
% Set the Distance metric among [select only the most important ones]
%metrics={'euclidean','squaredeuclidean','seuclidean','mahalanobis','minkowski',...
% 'chebychev','cosine','correlation','spearman'};
%[DIS,ok]=listdlg('PromptString','Select Distance Metric:',...
% 'ListString',metrics,'ListSize',[160,100],'SelectionMode','single');
L=linkage(a,CTmeth,'euclidean');
c=cluster(L,N);
%c=clusterdata(a,'maxclust',N);
% Consider Plotting the Dendrogram!!
case 'Fuzzy'
% Needs to remove Nans!!
[cen,u] = fcm(a,N);
maxu=max(u);
for i=1:N
ind=find(u(i,:)==maxu);
c(ind)=i;
end
c=c';L=[];
end
% % EXTRACT CLUSTERS
for j=1:N
Cluster(j).id=a(c==j,:);
Cluster(j).index=indx(c==j);
Cluster(j).Center=mean(Cluster(j).id);
if numel(Cluster(j).Center)~=numel(vectors);Cluster(j).Center=Cluster(j).id;end
end
%% Create Output ASCII FILE:
a(:,size(a,2)+1)=c;
for j=1:length(vectors);b(:,j)=Tdata(vectors(j)).origval_all;end
nb=size(b,2)+1;
for j=1:size(Cluster,2);b(Cluster(j).index,nb)=j;end
%% Store Outputs
cd Outputs_Clustering\
save('Cluster.mat','Cluster')
save('ClusterColumns.mat','ClusterColumns')
fid1=fopen('Clusters_Transformed.txt','w');
fid2=fopen('Clusters_Original.txt','w');
fid3=fopen('Clusters_Fields.txt','w');
fprintf(fid1,[repmat('%32.16f ',[1,size(a,2)-1]),' %d \n'],a');
fprintf(fid2,[repmat('%32.16f ',[1,size(b,2)-1]),' %d \n'],b');
for i=1:size(ClusterColumns,2)
fprintf(fid3,'%s ',ClusterColumns{i});
end
fclose all;
cd ../
end

View File

@@ -0,0 +1,52 @@
% This is a Wrapper Script for performing Cluster Analysis
% [function "Clustering_V2_*"] of the data resulting after
% performing Transformation to Equivalent Dimensions [by
% using "ED_ToolBox_Wrapper" script]. Some comments for
% Cluster Analysis are within 'Clustering_V2_*' code. Here
% the input data & parameters (functions arguments) are
% defined by the User. Please modify the script accordingly.
% The lines that can be modified are followed by a comment
% "- PLEASE SET".
% PLEASE REFER ALSO TO APPILCATION DOCUMENTATION:
% "READ_ME_App_1B_v2_Description_Cluster_Analysis.docx"
clc;clear;tic
% STEP 1. DATA Selection. Please Note that the Cluster Analysis concerns data after
% Transformation to Equivalent Dimensions, therefore the Output *.mat file
% obtained from "ED_ToolBox_Wrapper" script Application must be used as
% input data for this Application:
cd Transformed_Data % - PLEASE SET - specify the path to Data Directory
load ST2_Tdata.mat % - PLEASE SET - specify the input data filename (MATLAB forrmat: Output of T2ED Application)
cd ../
% Choose vector(s) from the Transformed Data Structure
vectors=[4,7,23]; % - PLEASE SET - specify columns (parameters) to be analyzed
% STEP 2. Define the Number of Clusters:
N=4; % - PLEASE SET - specify number of clusters to be formed
% STEP 3. Select Clustering Algorith:
Meth='Hierarchical'; % - PLEASE SET , Select from ['Partitioning', 'Hierarchical', 'Fuzzy']
% STEP 4. Select Method for Cluster Tree (only applicable for Meth=='Hierarchical']:
CTmeth='average'; % - PLEASE SET, Select from ['average<EFBFBD>,<EFBFBD>centroid<EFBFBD>,<EFBFBD>complete<EFBFBD>,<EFBFBD>median<EFBFBD>,<EFBFBD>single<EFBFBD>,<EFBFBD>ward<EFBFBD>,<EFBFBD>weighted<EFBFBD>]
% STEP 5 Number of leaf nodes
% only for FOR PLOTTING WARD DIAGRAM - 'Hierarchical'
Lnodes=87; % - PLEASE SET
% STEP 6 Run Function "Clustering"
[a,Cluster,ClusterColumns,L]=Clustering_V2_8(vectors,N,Meth,CTmeth,Tdata);
% STEP 7 Plotting Results (2D and 3D cases) % PLEASE Comment the next line to disable visualization
plotClustering
% STEP 8 Store Outputs
cd Outputs_Clustering\
save('Cluster.mat','Cluster')
save('ClusterColumns.mat','ClusterColumns')
cd ../
toc

View File

@@ -0,0 +1,34 @@
close all
%% Plotting example
k2=length(vectors);
if k2==2
for j=1:N
plot(Cluster(j).id(:,1),Cluster(j).id(:,2),'o');hold on;axis square
plot(Cluster(j).Center(1),Cluster(j).Center(2),'kx','MarkerSize',16,'LineWidth',2)
end
xlabel(ClusterColumns(1),'FontSize',14,'Interpreter','none')
ylabel(ClusterColumns(2),'FontSize',14,'Interpreter','none')
cd Outputs_Clustering\;saveas(gcf,'Cluster_output.jpg');cd ../
elseif k2==3
for j=1:N
plot3(Cluster(j).id(:,1),Cluster(j).id(:,2),Cluster(j).id(:,3),'o');hold on;grid on;axis square
plot3(Cluster(j).Center(1),Cluster(j).Center(2),Cluster(j).Center(3),'kx','MarkerSize',16,'LineWidth',2)
end
xlabel(ClusterColumns(1),'FontSize',14,'Interpreter','none')
ylabel(ClusterColumns(2),'FontSize',14,'Interpreter','none')
zlabel(ClusterColumns(3),'FontSize',14,'Interpreter','none')
cd Outputs_Clustering\;saveas(gcf,'Cluster_output.jpg');cd ../
end
if strcmp(Meth,'Hierarchical')==1;
figure;[H,T,outperm]=dendrogram(L,Lnodes,'ColorThreshold', L(length(L)-N+2,3));
cd Outputs_Clustering\;saveas(gcf,'Dendrogram_output.jpg');cd ../;end